]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/loong64: add VPERMI.W, XVPERMI.{W,V,Q} instruction support
authorGuoqi Chen <chenguoqi@loongson.cn>
Wed, 29 Oct 2025 07:43:54 +0000 (15:43 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Tue, 4 Nov 2025 01:16:43 +0000 (17:16 -0800)
Go asm syntax:
 VPERMIW        $0x1b, vj, vd
XVPERMI{W,V,Q}  $0x1b, xj, xd

Equivalent platform assembler syntax:
 vpermi.w       vd, vj, $0x1b
xvpermi.{w,d,q} xd, xj, $0x1b

Change-Id: Ie23b2fdd09b4c93801dc804913206f1c5a496268
Reviewed-on: https://go-review.googlesource.com/c/go/+/716800
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
src/cmd/asm/internal/asm/testdata/loong64enc1.s
src/cmd/internal/obj/loong64/a.out.go
src/cmd/internal/obj/loong64/anames.go
src/cmd/internal/obj/loong64/asm.go
src/cmd/internal/obj/loong64/doc.go

index 6e2a86969d54cbf055753adbea3aff9e1d21c3f1..ca86ef8e6c1881649902bba0219017cbf31f0289 100644 (file)
@@ -1017,6 +1017,12 @@ lable2:
        XVSHUF4IV       $8, X1, X2      // 22209c77
        XVSHUF4IV       $15, X1, X2     // 223c9c77
 
+       // VPERMIW, XVPERMI{W,V,Q} instructions
+       VPERMIW         $0x1B, V1, V2   // VPERMIW      $27, V1, V2 // 226ce473
+       XVPERMIW        $0x2B, X1, X2   // XVPERMIW     $43, X1, X2 // 22ace477
+       XVPERMIV        $0x3B, X1, X2   // XVPERMIV     $59, X1, X2 // 22ece877
+       XVPERMIQ        $0x4B, X1, X2   // XVPERMIQ     $75, X1, X2 // 222ced77
+
        // [X]VSETEQZ.V, [X]VSETNEZ.V
        VSETEQV         V1, FCC0        // 20989c72
        VSETNEV         V1, FCC0        // 209c9c72
index 3a676db922ca71ea38a5e362043a7916232cd0bd..762dc338e3e149e719ba99fe3cadee5aec77d8f6 100644 (file)
@@ -1115,6 +1115,11 @@ const (
        AXVSHUF4IW
        AXVSHUF4IV
 
+       AVPERMIW
+       AXVPERMIW
+       AXVPERMIV
+       AXVPERMIQ
+
        AVSETEQV
        AVSETNEV
        AVSETANYEQB
index 422ccbd9b0bc0af323b3b642f8d06776a2650545..607e6063110a3c9d0cc3a3056a619359a5c28733 100644 (file)
@@ -586,6 +586,10 @@ var Anames = []string{
        "XVSHUF4IH",
        "XVSHUF4IW",
        "XVSHUF4IV",
+       "VPERMIW",
+       "XVPERMIW",
+       "XVPERMIV",
+       "XVPERMIQ",
        "VSETEQV",
        "VSETNEV",
        "VSETANYEQB",
index 7eb5668d82e2316c4864d5b0f955b11706377623..8e2393bc1cdcd57ac70714d7db39946095b01b0e 100644 (file)
@@ -1778,6 +1778,7 @@ func buildop(ctxt *obj.Link) {
                        opset(AVSHUF4IH, r0)
                        opset(AVSHUF4IW, r0)
                        opset(AVSHUF4IV, r0)
+                       opset(AVPERMIW, r0)
 
                case AXVANDB:
                        opset(AXVORB, r0)
@@ -1787,6 +1788,9 @@ func buildop(ctxt *obj.Link) {
                        opset(AXVSHUF4IH, r0)
                        opset(AXVSHUF4IW, r0)
                        opset(AXVSHUF4IV, r0)
+                       opset(AXVPERMIW, r0)
+                       opset(AXVPERMIV, r0)
+                       opset(AXVPERMIQ, r0)
 
                case AVANDV:
                        opset(AVORV, r0)
@@ -4362,6 +4366,14 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
                return 0x1de6 << 18 // xvshuf4i.w
        case AXVSHUF4IV:
                return 0x1de7 << 18 // xvshuf4i.d
+       case AVPERMIW:
+               return 0x1cf9 << 18 // vpermi.w
+       case AXVPERMIW:
+               return 0x1df9 << 18 // xvpermi.w
+       case AXVPERMIV:
+               return 0x1dfa << 18 // xvpermi.d
+       case AXVPERMIQ:
+               return 0x1dfb << 18 // xvpermi.q
        case AVBITCLRB:
                return 0x1CC4<<18 | 0x1<<13 // vbitclri.b
        case AVBITCLRH:
index f7e5a4fb4279eabcf6453e80aea4f10c897811b6..45f75e6e7037804792271266f295d3c5576e146e 100644 (file)
@@ -229,6 +229,23 @@ Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate)
          VMOVQ  8(R4), V5.W4     |      vldrepl.w  v5, r4, $2
          VMOVQ  8(R4), V5.V2     |      vldrepl.d  v5, r4, $1
 
+3.8 Vector permutation instruction
+       Instruction format:
+       VPERMIW    ui8, Vj, Vd
+
+       Mapping between Go and platform assembly:
+            Go assembly     |   platform assembly   |                                 semantics
+       VPERMIW  ui8, Vj, Vd |  vpermi.w vd, vj, ui8 | VR[vd].W[0] = VR[vj].W[ui8[1:0]], VR[vd].W[1] = VR[vj].W[ui8[3:2]],
+                            |                       | VR[vd].W[2] = VR[vd].W[ui8[5:4]], VR[vd].W[3] = VR[vd].W[ui8[7:6]]
+       XVPERMIW ui8, Xj, Xd | xvpermi.w xd, xj, ui8 | XR[xd].W[0] = XR[xj].W[ui8[1:0]],   XR[xd].W[1] = XR[xj].W[ui8[3:2]],
+                            |                       | XR[xd].W[3] = XR[xd].W[ui8[7:6]],   XR[xd].W[2] = XR[xd].W[ui8[5:4]],
+                            |                       | XR[xd].W[4] = XR[xj].W[ui8[1:0]+4], XR[xd].W[5] = XR[xj].W[ui8[3:2]+4],
+                            |                       | XR[xd].W[6] = XR[xd].W[ui8[5:4]+4], XR[xd].W[7] = XR[xd].W[ui8[7:6]+4]
+       XVPERMIV ui8, Xj, Xd | xvpermi.d xd, xj, ui8 | XR[xd].D[0] = XR[xj].D[ui8[1:0]], XR[xd].D[1] = XR[xj].D[ui8[3:2]],
+                            |                       | XR[xd].D[2] = XR[xj].D[ui8[5:4]], XR[xd].D[3] = XR[xj].D[ui8[7:6]]
+       XVPERMIQ ui8, Xj, Xd | xvpermi.q xd, xj, ui8 | vec = {XR[xd], XR[xj]}, XR[xd].Q[0] = vec.Q[ui8[1:0]], XR[xd].Q[1] = vec.Q[ui8[5:4]]
+
+
 # Special instruction encoding definition and description on LoongArch
 
  1. DBAR hint encoding for LA664(Loongson 3A6000) and later micro-architectures, paraphrased