]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/loong64: add {,X}VEXTRINS.{B,H,W,V} instruction support
authorGuoqi Chen <chenguoqi@loongson.cn>
Thu, 30 Oct 2025 12:27:04 +0000 (20:27 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Thu, 6 Nov 2025 00:43:11 +0000 (16:43 -0800)
Go asm syntax:
 VEXTRINS{B,H,W,V} $0x1b, vj,vd
XVEXTRINS{B,H,W,V} $0x1b, vj,vd

Equivalent platform assembler syntax:
         vextrins.{b,h,w,d}     vd, vj, $0x1b
        xvextrins.{b,h,w,d}     xd, xj, $0x1b

Change-Id: Ibc0bf926befaa2f810cfedd9a40f7ad9a6a9d7fc
Reviewed-on: https://go-review.googlesource.com/c/go/+/716803
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
src/cmd/asm/internal/asm/testdata/loong64enc1.s
src/cmd/internal/obj/loong64/a.out.go
src/cmd/internal/obj/loong64/anames.go
src/cmd/internal/obj/loong64/asm.go
src/cmd/internal/obj/loong64/doc.go

index e0619f8ecddd4cdd0dbea65b51b92c62cd3677c9..b440a6456fcb3bbfb04582abe46eac7a63d632cd 100644 (file)
@@ -1029,6 +1029,16 @@ lable2:
        XVPERMIV        $0x3B, X1, X2   // XVPERMIV     $59, X1, X2 // 22ece877
        XVPERMIQ        $0x4B, X1, X2   // XVPERMIQ     $75, X1, X2 // 222ced77
 
+       // A{,X}VEXTRINS.{B,H,W,V} instructions
+       VEXTRINSB       $0x18, V1, V2   // VEXTRINSB    $24, V1, V2 // 22608c73
+       VEXTRINSH       $0x27, V1, V2   // VEXTRINSH    $39, V1, V2 // 229c8873
+       VEXTRINSW       $0x36, V1, V2   // VEXTRINSW    $54, V1, V2 // 22d88473
+       VEXTRINSV       $0x45, V1, V2   // VEXTRINSV    $69, V1, V2 // 22148173
+       XVEXTRINSB      $0x54, X1, X2   // XVEXTRINSB   $84, X1, X2 // 22508d77
+       XVEXTRINSH      $0x63, X1, X2   // XVEXTRINSH   $99, X1, X2 // 228c8977
+       XVEXTRINSW      $0x72, X1, X2   // XVEXTRINSW   $114, X1, X2 // 22c88577
+       XVEXTRINSV      $0x81, X1, X2   // XVEXTRINSV   $129, X1, X2 // 22048277
+
        // [X]VSETEQZ.V, [X]VSETNEZ.V
        VSETEQV         V1, FCC0        // 20989c72
        VSETNEV         V1, FCC0        // 209c9c72
index 762dc338e3e149e719ba99fe3cadee5aec77d8f6..48e60b64bf37a3563408e363f3b6ede4e7dd9dfd 100644 (file)
@@ -1120,6 +1120,15 @@ const (
        AXVPERMIV
        AXVPERMIQ
 
+       AVEXTRINSB
+       AVEXTRINSH
+       AVEXTRINSW
+       AVEXTRINSV
+       AXVEXTRINSB
+       AXVEXTRINSH
+       AXVEXTRINSW
+       AXVEXTRINSV
+
        AVSETEQV
        AVSETNEV
        AVSETANYEQB
index 607e6063110a3c9d0cc3a3056a619359a5c28733..95806741e6ee722e74c627dce4c5a7f1deaa1b3a 100644 (file)
@@ -590,6 +590,14 @@ var Anames = []string{
        "XVPERMIW",
        "XVPERMIV",
        "XVPERMIQ",
+       "VEXTRINSB",
+       "VEXTRINSH",
+       "VEXTRINSW",
+       "VEXTRINSV",
+       "XVEXTRINSB",
+       "XVEXTRINSH",
+       "XVEXTRINSW",
+       "XVEXTRINSV",
        "VSETEQV",
        "VSETNEV",
        "VSETANYEQB",
index 87691838861c3de0cbd797d09567eed1b467278a..8bf8a1d99011e16d68e07d7c27d5d779a7c89882 100644 (file)
@@ -1781,6 +1781,10 @@ func buildop(ctxt *obj.Link) {
                        opset(AVSHUF4IW, r0)
                        opset(AVSHUF4IV, r0)
                        opset(AVPERMIW, r0)
+                       opset(AVEXTRINSB, r0)
+                       opset(AVEXTRINSH, r0)
+                       opset(AVEXTRINSW, r0)
+                       opset(AVEXTRINSV, r0)
 
                case AXVANDB:
                        opset(AXVORB, r0)
@@ -1793,6 +1797,10 @@ func buildop(ctxt *obj.Link) {
                        opset(AXVPERMIW, r0)
                        opset(AXVPERMIV, r0)
                        opset(AXVPERMIQ, r0)
+                       opset(AXVEXTRINSB, r0)
+                       opset(AXVEXTRINSH, r0)
+                       opset(AXVEXTRINSW, r0)
+                       opset(AXVEXTRINSV, r0)
 
                case AVANDV:
                        opset(AVORV, r0)
@@ -4383,6 +4391,22 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
                return 0x1dfa << 18 // xvpermi.d
        case AXVPERMIQ:
                return 0x1dfb << 18 // xvpermi.q
+       case AVEXTRINSB:
+               return 0x1ce3 << 18 // vextrins.b
+       case AVEXTRINSH:
+               return 0x1ce2 << 18 // vextrins.h
+       case AVEXTRINSW:
+               return 0x1ce1 << 18 // vextrins.w
+       case AVEXTRINSV:
+               return 0x1ce0 << 18 // vextrins.d
+       case AXVEXTRINSB:
+               return 0x1de3 << 18 // xvextrins.b
+       case AXVEXTRINSH:
+               return 0x1de2 << 18 // xvextrins.h
+       case AXVEXTRINSW:
+               return 0x1de1 << 18 // xvextrins.w
+       case AXVEXTRINSV:
+               return 0x1de0 << 18 // xvextrins.d
        case AVBITCLRB:
                return 0x1CC4<<18 | 0x1<<13 // vbitclri.b
        case AVBITCLRH:
index c96501ea81b990f689b49325cf1b30add1cf8741..19c9e05590c4f86f6e68b11e0fd45b2db157e059 100644 (file)
@@ -254,6 +254,23 @@ Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate)
                             |                       | XR[xd].D[2] = XR[xj].D[ui8[5:4]], XR[xd].D[3] = XR[xj].D[ui8[7:6]]
        XVPERMIQ ui8, Xj, Xd | xvpermi.q xd, xj, ui8 | vec = {XR[xd], XR[xj]}, XR[xd].Q[0] = vec.Q[ui8[1:0]], XR[xd].Q[1] = vec.Q[ui8[5:4]]
 
+3.9 Vector misc instruction
+
+3.9.1 {,X}VEXTRINS.{B,H,W,V}
+
+       Instruction format:
+       VEXTRINSB   ui8, Vj, Vd
+
+       Mapping between Go and platform assembly:
+             Go assembly      |    platform assembly    |             semantics
+        VEXTRINSB ui8, Vj, Vd |  vextrins.b vd, vj, ui8 | VR[vd].B[ui8[7:4]] = VR[vj].B[ui8[3:0]]
+        VEXTRINSH ui8, Vj, Vd |  vextrins.h vd, vj, ui8 | VR[vd].H[ui8[6:4]] = VR[vj].H[ui8[2:0]]
+        VEXTRINSW ui8, Vj, Vd |  vextrins.w vd, vj, ui8 | VR[vd].W[ui8[5:4]] = VR[vj].W[ui8[1:0]]
+        VEXTRINSV ui8, Vj, Vd |  vextrins.d vd, vj, ui8 | VR[vd].D[ui8[4]] = VR[vj].D[ui8[0]]
+       XVEXTRINSB ui8, Vj, Vd | xvextrins.b vd, vj, ui8 | XR[xd].B[ui8[7:4]] = XR[xj].B[ui8[3:0]], XR[xd].B[ui8[7:4]+16] = XR[xj].B[ui8[3:0]+16]
+       XVEXTRINSH ui8, Vj, Vd | xvextrins.h vd, vj, ui8 | XR[xd].H[ui8[6:4]] = XR[xj].H[ui8[2:0]], XR[xd].H[ui8[6:4]+8] = XR[xj].H[ui8[2:0]+8]
+       XVEXTRINSW ui8, Vj, Vd | xvextrins.w vd, vj, ui8 | XR[xd].W[ui8[5:4]] = XR[xj].W[ui8[1:0]], XR[xd].W[ui8[5:4]+4] = XR[xj].W[ui8[1:0]+4]
+       XVEXTRINSV ui8, Vj, Vd | xvextrins.d vd, vj, ui8 | XR[xd].D[ui8[4]] = XR[xj].D[ui8[0]],XR[xd].D[ui8[4]+2] = XR[xj].D[ui8[0]+2]
 
 # Special instruction encoding definition and description on LoongArch