XVSHUF4IV $8, X1, X2 // 22209c77
XVSHUF4IV $15, X1, X2 // 223c9c77
+ // VPERMIW, XVPERMI{W,V,Q} instructions
+ VPERMIW $0x1B, V1, V2 // VPERMIW $27, V1, V2 // 226ce473
+ XVPERMIW $0x2B, X1, X2 // XVPERMIW $43, X1, X2 // 22ace477
+ XVPERMIV $0x3B, X1, X2 // XVPERMIV $59, X1, X2 // 22ece877
+ XVPERMIQ $0x4B, X1, X2 // XVPERMIQ $75, X1, X2 // 222ced77
+
// [X]VSETEQZ.V, [X]VSETNEZ.V
VSETEQV V1, FCC0 // 20989c72
VSETNEV V1, FCC0 // 209c9c72
opset(AVSHUF4IH, r0)
opset(AVSHUF4IW, r0)
opset(AVSHUF4IV, r0)
+ opset(AVPERMIW, r0)
case AXVANDB:
opset(AXVORB, r0)
opset(AXVSHUF4IH, r0)
opset(AXVSHUF4IW, r0)
opset(AXVSHUF4IV, r0)
+ opset(AXVPERMIW, r0)
+ opset(AXVPERMIV, r0)
+ opset(AXVPERMIQ, r0)
case AVANDV:
opset(AVORV, r0)
return 0x1de6 << 18 // xvshuf4i.w
case AXVSHUF4IV:
return 0x1de7 << 18 // xvshuf4i.d
+ case AVPERMIW:
+ return 0x1cf9 << 18 // vpermi.w
+ case AXVPERMIW:
+ return 0x1df9 << 18 // xvpermi.w
+ case AXVPERMIV:
+ return 0x1dfa << 18 // xvpermi.d
+ case AXVPERMIQ:
+ return 0x1dfb << 18 // xvpermi.q
case AVBITCLRB:
return 0x1CC4<<18 | 0x1<<13 // vbitclri.b
case AVBITCLRH:
VMOVQ 8(R4), V5.W4 | vldrepl.w v5, r4, $2
VMOVQ 8(R4), V5.V2 | vldrepl.d v5, r4, $1
+3.8 Vector permutation instruction
+ Instruction format:
+ VPERMIW ui8, Vj, Vd
+
+ Mapping between Go and platform assembly:
+ Go assembly | platform assembly | semantics
+ VPERMIW ui8, Vj, Vd | vpermi.w vd, vj, ui8 | VR[vd].W[0] = VR[vj].W[ui8[1:0]], VR[vd].W[1] = VR[vj].W[ui8[3:2]],
+ | | VR[vd].W[2] = VR[vd].W[ui8[5:4]], VR[vd].W[3] = VR[vd].W[ui8[7:6]]
+ XVPERMIW ui8, Xj, Xd | xvpermi.w xd, xj, ui8 | XR[xd].W[0] = XR[xj].W[ui8[1:0]], XR[xd].W[1] = XR[xj].W[ui8[3:2]],
+ | | XR[xd].W[3] = XR[xd].W[ui8[7:6]], XR[xd].W[2] = XR[xd].W[ui8[5:4]],
+ | | XR[xd].W[4] = XR[xj].W[ui8[1:0]+4], XR[xd].W[5] = XR[xj].W[ui8[3:2]+4],
+ | | XR[xd].W[6] = XR[xd].W[ui8[5:4]+4], XR[xd].W[7] = XR[xd].W[ui8[7:6]+4]
+ XVPERMIV ui8, Xj, Xd | xvpermi.d xd, xj, ui8 | XR[xd].D[0] = XR[xj].D[ui8[1:0]], XR[xd].D[1] = XR[xj].D[ui8[3:2]],
+ | | XR[xd].D[2] = XR[xj].D[ui8[5:4]], XR[xd].D[3] = XR[xj].D[ui8[7:6]]
+ XVPERMIQ ui8, Xj, Xd | xvpermi.q xd, xj, ui8 | vec = {XR[xd], XR[xj]}, XR[xd].Q[0] = vec.Q[ui8[1:0]], XR[xd].Q[1] = vec.Q[ui8[5:4]]
+
+
# Special instruction encoding definition and description on LoongArch
1. DBAR hint encoding for LA664(Loongson 3A6000) and later micro-architectures, paraphrased