{AVMOVQ, C_ELEM, C_NONE, C_NONE, C_ARNG, C_NONE, 45, 4, 0, 0},
- {APRELD, C_SOREG, C_U5CON, C_NONE, C_NONE, C_NONE, 46, 4, 0, 0},
- {APRELDX, C_SOREG, C_DCON, C_U5CON, C_NONE, C_NONE, 47, 20, 0, 0},
+ {AVMOVQ, C_SOREG, C_NONE, C_NONE, C_ARNG, C_NONE, 46, 4, 0, 0},
+ {AXVMOVQ, C_SOREG, C_NONE, C_NONE, C_ARNG, C_NONE, 46, 4, 0, 0},
+
+ {APRELD, C_SOREG, C_U5CON, C_NONE, C_NONE, C_NONE, 47, 4, 0, 0},
+ {APRELDX, C_SOREG, C_DCON, C_U5CON, C_NONE, C_NONE, 48, 20, 0, 0},
{obj.APCALIGN, C_U12CON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
{obj.APCDATA, C_32CON, C_NONE, C_NONE, C_32CON, C_NONE, 0, 0, 0, 0},
o1 = uint32(c.regoff(&p.From))
case 39: // vmov Rn, Vd.<T>[index]
- v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
+ v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
if v == 0 {
c.ctxt.Diag("illegal arng type combination: %v\n", p)
}
o1 = v | (index << 10) | (Rj << 5) | Vd
case 40: // vmov Vd.<T>[index], Rn
- v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
+ v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
if v == 0 {
c.ctxt.Diag("illegal arng type combination: %v\n", p)
}
o1 = v | (index << 10) | (Vj << 5) | Rd
case 41: // vmov Rn, Vd.<T>
- v, _ := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
+ v, _ := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
if v == 0 {
c.ctxt.Diag("illegal arng type combination: %v\n", p)
}
o1 = v | (Rj << 5) | Vd
case 42: // vmov xj, xd.<T>
- v, _ := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
+ v, _ := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
if v == 0 {
c.ctxt.Diag("illegal arng type combination: %v\n", p)
}
o1 = v | (Xj << 5) | Xd
case 43: // vmov xj, xd.<T>[index]
- v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
+ v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
if v == 0 {
c.ctxt.Diag("illegal arng type combination: %v\n", p)
}
o1 = v | (index << 10) | (Xj << 5) | Xd
case 44: // vmov xj.<T>[index], xd
- v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
+ v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
if v == 0 {
c.ctxt.Diag("illegal arng type combination: %v\n", p)
}
o1 = v | (index << 10) | (Xj << 5) | Xd
case 45: // vmov vj.<T>[index], vd.<T>
- v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
+ v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
if v == 0 {
c.ctxt.Diag("illegal arng type combination: %v\n", p)
}
c.checkindex(p, index, m)
o1 = v | (index << 10) | (vj << 5) | vd
- case 46: // preld offset(Rbase), $hint
+ case 46: // vmov offset(vj), vd.<T>
+ v, _ := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, true)
+ if v == 0 {
+ c.ctxt.Diag("illegal arng type combination: %v\n", p)
+ }
+
+ si := c.regoff(&p.From)
+ Rj := uint32(p.From.Reg & EXT_REG_MASK)
+ Vd := uint32(p.To.Reg & EXT_REG_MASK)
+ o1 = v | uint32(si<<10) | (Rj << 5) | Vd
+
+ case 47: // preld offset(Rbase), $hint
offs := c.regoff(&p.From)
hint := p.GetFrom3().Offset
o1 = OP_12IR_5I(c.opiir(p.As), uint32(offs), uint32(p.From.Reg), uint32(hint))
- case 47: // preldx offset(Rbase), $n, $hint
+ case 48: // preldx offset(Rbase), $n, $hint
offs := c.regoff(&p.From)
hint := p.RestArgs[1].Offset
n := uint64(p.GetFrom3().Offset)
return 0
}
-func (c *ctxt0) specialLsxMovInst(a obj.As, fReg, tReg int16) (op_code, index_mask uint32) {
+func (c *ctxt0) specialLsxMovInst(a obj.As, fReg, tReg int16, offset_flag bool) (op_code, index_mask uint32) {
farng := (fReg >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK
tarng := (tReg >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK
fclass := c.rclass(fReg)
}
case C_REG | (C_ARNG << 16):
- // vmov Rn, Vd.<T>
- switch a {
- case AVMOVQ:
- switch tarng {
- case ARNG_16B:
- return (0x1CA7C0 << 10), 0x0 // vreplgr2vr.b
- case ARNG_8H:
- return (0x1CA7C1 << 10), 0x0 // vreplgr2vr.h
- case ARNG_4W:
- return (0x1CA7C2 << 10), 0x0 // vreplgr2vr.w
- case ARNG_2V:
- return (0x1CA7C3 << 10), 0x0 // vreplgr2vr.d
+ switch {
+ case offset_flag:
+ // vmov offset(vj), vd.<T>
+ switch a {
+ case AVMOVQ:
+ switch tarng {
+ case ARNG_16B:
+ return (0xC2 << 22), 0x0 // vldrepl.b
+ case ARNG_8H:
+ return (0x182 << 21), 0x0 // vldrepl.h
+ case ARNG_4W:
+ return (0x302 << 20), 0x0 // vldrepl.w
+ case ARNG_2V:
+ return (0x602 << 19), 0x0 // vldrepl.d
+ }
+ case AXVMOVQ:
+ switch tarng {
+ case ARNG_32B:
+ return (0xCA << 22), 0x0 // xvldrepl.b
+ case ARNG_16H:
+ return (0x192 << 21), 0x0 // xvldrepl.h
+ case ARNG_8W:
+ return (0x322 << 20), 0x0 // xvldrepl.w
+ case ARNG_4V:
+ return (0x642 << 19), 0x0 // xvldrepl.d
+ }
}
- case AXVMOVQ:
- switch tarng {
- case ARNG_32B:
- return (0x1DA7C0 << 10), 0x0 // xvreplgr2vr.b
- case ARNG_16H:
- return (0x1DA7C1 << 10), 0x0 // xvreplgr2vr.h
- case ARNG_8W:
- return (0x1DA7C2 << 10), 0x0 // xvreplgr2vr.w
- case ARNG_4V:
- return (0x1DA7C3 << 10), 0x0 // xvreplgr2vr.d
+ default:
+ // vmov Rn, Vd.<T>
+ switch a {
+ case AVMOVQ:
+ switch tarng {
+ case ARNG_16B:
+ return (0x1CA7C0 << 10), 0x0 // vreplgr2vr.b
+ case ARNG_8H:
+ return (0x1CA7C1 << 10), 0x0 // vreplgr2vr.h
+ case ARNG_4W:
+ return (0x1CA7C2 << 10), 0x0 // vreplgr2vr.w
+ case ARNG_2V:
+ return (0x1CA7C3 << 10), 0x0 // vreplgr2vr.d
+ }
+ case AXVMOVQ:
+ switch tarng {
+ case ARNG_32B:
+ return (0x1DA7C0 << 10), 0x0 // xvreplgr2vr.b
+ case ARNG_16H:
+ return (0x1DA7C1 << 10), 0x0 // xvreplgr2vr.h
+ case ARNG_8W:
+ return (0x1DA7C2 << 10), 0x0 // xvreplgr2vr.w
+ case ARNG_4V:
+ return (0x1DA7C3 << 10), 0x0 // xvreplgr2vr.d
+ }
}
}
VMOVQ Vj.W[index], Vd.W4 | vreplvei.w vd, vj, ui2 | for i in range(4) : VR[vd].w[i] = VR[vj].w[ui2]
VMOVQ Vj.V[index], Vd.V2 | vreplvei.d vd, vj, ui1 | for i in range(2) : VR[vd].d[i] = VR[vj].d[ui1]
+3.7 Load data from memory and broadcast to each element of a vector register.
+
+ Instruction format:
+ VMOVQ offset(Rj), <Vd>.<T>
+
+ Mapping between Go and platform assembly:
+ Go assembly | platform assembly | semantics
+ -------------------------------------------------------------------------------------------------------------------------------------------------------
+ VMOVQ offset(Rj), Vd.B16 | vldrepl.b Vd, Rj, si12 | for i in range(16): VR[vd].b[i] = load 8 bit memory data from (GR[rj]+SignExtend(si12))
+ VMOVQ offset(Rj), Vd.H8 | vldrepl.h Vd, Rj, si11 | for i in range(8) : VR[vd].h[i] = load 16 bit memory data from (GR[rj]+SignExtend(si11<<1))
+ VMOVQ offset(Rj), Vd.W4 | vldrepl.w Vd, Rj, si10 | for i in range(4) : VR[vd].w[i] = load 32 bit memory data from (GR[rj]+SignExtend(si10<<2))
+ VMOVQ offset(Rj), Vd.V2 | vldrepl.d Vd, Rj, si9 | for i in range(2) : VR[vd].d[i] = load 64 bit memory data from (GR[rj]+SignExtend(si9<<3))
+ XVMOVQ offset(Rj), Xd.B32 | xvldrepl.b Xd, Rj, si12 | for i in range(32): XR[xd].b[i] = load 8 bit memory data from (GR[rj]+SignExtend(si12))
+ XVMOVQ offset(Rj), Xd.H16 | xvldrepl.h Xd, Rj, si11 | for i in range(16): XR[xd].h[i] = load 16 bit memory data from (GR[rj]+SignExtend(si11<<1))
+ XVMOVQ offset(Rj), Xd.W8 | xvldrepl.w Xd, Rj, si10 | for i in range(8) : XR[xd].w[i] = load 32 bit memory data from (GR[rj]+SignExtend(si10<<2))
+ XVMOVQ offset(Rj), Xd.V4 | xvldrepl.d Xd, Rj, si9 | for i in range(4) : XR[xd].d[i] = load 64 bit memory data from (GR[rj]+SignExtend(si9<<3))
+
# Special instruction encoding definition and description on LoongArch
1. DBAR hint encoding for LA664(Loongson 3A6000) and later micro-architectures, paraphrased