]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/loong64: add {,x}vmadd series instructions support
authorGuoqi Chen <chenguoqi@loongson.cn>
Mon, 24 Nov 2025 12:19:06 +0000 (20:19 +0800)
committerGopher Robot <gobot@golang.org>
Wed, 26 Nov 2025 18:40:28 +0000 (10:40 -0800)
Go asm syntax:
  VMADD{B, H, W, V}                         V1, V2, V3
  VMSUB{B, H, W, V}                         V1, V2, V3
 XVMADD{B, H, W, V}                         X1, X2, X3
 XVMSUB{B, H, W, V}                         X1, X2, X3
 VMADDWEV{HB, WH, VW,QV}{,U}                V1, V2, V3
 VMADDWOD{HB, WH, VW,QV}{,U}                V1, V2, V3
XVMADDWEV{HB, WH, VW,QV}{,U}                X1, X2, X3
XVMADDWOD{HB, WH, VW,QV}{,U}                X1, X2, X3
 VMADDWEV{HBUB, WHUH, VWUW, QVUV}           V1, V2, V3
 VMADDWOD{HBUB, WHUH, VWUW, QVUV}           V1, V2, V3
XVMADDWEV{HBUB, WHUH, VWUW, QVUV}           X1, X2, X3
XVMADDWOD{HBUB, WHUH, VWUW, QVUV}           X1, X2, X3

Equivalent platform assembler syntax:
 vmadd.{b,h,w,d}                            v3, v2, v1
 vmsub.{b,h,w,d}                            v3, v2, v1
xvmadd.{b,h,w,d}                            x3, x2, x1
xvmsub.{b,h,w,d}                            x3, x2, x1
 vmaddwev.{h.b, w.h, d.w, q.d}{,u}          v3, v2, v1
 vmaddwod.{h.b, w.h, d.w, q.d}{,u}          v3, v2, v1
xvmaddwev.{h.b, w.h, d.w, q.d}{,u}          x3, x2, x1
xvmaddwod.{h.b, w.h, d.w, q.d}{,u}          x3, x2, x1
 vmaddwev.{h.bu.b, d.wu.w, d.wu.w, q.du.d}  v3, v2, v1
 vmaddwod.{h.bu.b, d.wu.w, d.wu.w, q.du.d}  v3, v2, v1
xvmaddwev.{h.bu.b, d.wu.w, d.wu.w, q.du.d}  x3, x2, x1
xvmaddwod.{h.bu.b, d.wu.w, d.wu.w, q.du.d}  x3, x2, x1

Change-Id: I2f4aae51045e1596d4744e525a1589586065cf8e
Reviewed-on: https://go-review.googlesource.com/c/go/+/724200
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: abner chenc <chenguoqi@loongson.cn>

src/cmd/asm/internal/asm/testdata/loong64enc1.s
src/cmd/internal/obj/loong64/a.out.go
src/cmd/internal/obj/loong64/anames.go
src/cmd/internal/obj/loong64/asm.go

index 20fd01443406897a7ab85602767fa935764d42a6..42fa50583230fc89d64472504846408edaa15e77 100644 (file)
@@ -1163,6 +1163,78 @@ lable2:
        XVSUBWODVWU     X1, X2, X3      // 43043574
        XVSUBWODQVU     X1, X2, X3      // 43843574
 
+       // [X]VMADD.{B/H/W/D}, [X]VMSUB.{B/H/W/D} instructions
+       VMADDB          V1, V2, V3      // 4304a870
+       VMADDH          V1, V2, V3      // 4384a870
+       VMADDW          V1, V2, V3      // 4304a970
+       VMADDV          V1, V2, V3      // 4384a970
+       VMSUBB          V1, V2, V3      // 4304aa70
+       VMSUBH          V1, V2, V3      // 4384aa70
+       VMSUBW          V1, V2, V3      // 4304ab70
+       VMSUBV          V1, V2, V3      // 4384ab70
+       XVMADDB         X1, X2, X3      // 4304a874
+       XVMADDH         X1, X2, X3      // 4384a874
+       XVMADDW         X1, X2, X3      // 4304a974
+       XVMADDV         X1, X2, X3      // 4384a974
+       XVMSUBB         X1, X2, X3      // 4304aa74
+       XVMSUBH         X1, X2, X3      // 4384aa74
+       XVMSUBW         X1, X2, X3      // 4304ab74
+       XVMSUBV         X1, X2, X3      // 4384ab74
+
+       // [X]VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D} instructions
+       VMADDWEVHB      V1, V2, V3      // 4304ac70
+       VMADDWEVWH      V1, V2, V3      // 4384ac70
+       VMADDWEVVW      V1, V2, V3      // 4304ad70
+       VMADDWEVQV      V1, V2, V3      // 4384ad70
+       VMADDWODHB      V1, V2, V3      // 4304ae70
+       VMADDWODWH      V1, V2, V3      // 4384ae70
+       VMADDWODVW      V1, V2, V3      // 4304af70
+       VMADDWODQV      V1, V2, V3      // 4384af70
+       XVMADDWEVHB     X1, X2, X3      // 4304ac74
+       XVMADDWEVWH     X1, X2, X3      // 4384ac74
+       XVMADDWEVVW     X1, X2, X3      // 4304ad74
+       XVMADDWEVQV     X1, X2, X3      // 4384ad74
+       XVMADDWODHB     X1, X2, X3      // 4304ae74
+       XVMADDWODWH     X1, X2, X3      // 4384ae74
+       XVMADDWODVW     X1, X2, X3      // 4304af74
+       XVMADDWODQV     X1, X2, X3      // 4384af74
+
+       // [X]VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}U instructions
+       VMADDWEVHBU     V1, V2, V3      // 4304b470
+       VMADDWEVWHU     V1, V2, V3      // 4384b470
+       VMADDWEVVWU     V1, V2, V3      // 4304b570
+       VMADDWEVQVU     V1, V2, V3      // 4384b570
+       VMADDWODHBU     V1, V2, V3      // 4304b670
+       VMADDWODWHU     V1, V2, V3      // 4384b670
+       VMADDWODVWU     V1, V2, V3      // 4304b770
+       VMADDWODQVU     V1, V2, V3      // 4384b770
+       XVMADDWEVHBU    X1, X2, X3      // 4304b474
+       XVMADDWEVWHU    X1, X2, X3      // 4384b474
+       XVMADDWEVVWU    X1, X2, X3      // 4304b574
+       XVMADDWEVQVU    X1, X2, X3      // 4384b574
+       XVMADDWODHBU    X1, X2, X3      // 4304b674
+       XVMADDWODWHU    X1, X2, X3      // 4384b674
+       XVMADDWODVWU    X1, X2, X3      // 4304b774
+       XVMADDWODQVU    X1, X2, X3      // 4384b774
+
+       // [X]VMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D} instructions
+       VMADDWEVHBUB    V1, V2, V3      // 4304bc70
+       VMADDWEVWHUH    V1, V2, V3      // 4384bc70
+       VMADDWEVVWUW    V1, V2, V3      // 4304bd70
+       VMADDWEVQVUV    V1, V2, V3      // 4384bd70
+       VMADDWODHBUB    V1, V2, V3      // 4304be70
+       VMADDWODWHUH    V1, V2, V3      // 4384be70
+       VMADDWODVWUW    V1, V2, V3      // 4304bf70
+       VMADDWODQVUV    V1, V2, V3      // 4384bf70
+       XVMADDWEVHBUB   X1, X2, X3      // 4304bc74
+       XVMADDWEVWHUH   X1, X2, X3      // 4384bc74
+       XVMADDWEVVWUW   X1, X2, X3      // 4304bd74
+       XVMADDWEVQVUV   X1, X2, X3      // 4384bd74
+       XVMADDWODHBUB   X1, X2, X3      // 4304be74
+       XVMADDWODWHUH   X1, X2, X3      // 4384be74
+       XVMADDWODVWUW   X1, X2, X3      // 4304bf74
+       XVMADDWODQVUV   X1, X2, X3      // 4384bf74
+
        // [X]VSHUF4I.{B/H/W/D} instructions
        VSHUF4IB        $0, V2, V1      // 41009073
        VSHUF4IB        $16, V2, V1     // 41409073
index 2458fb2e8e884349f665f0c52b5376cd459df8af..38d4b74959098c81a46e0d5977bea0e6a2ee3e12 100644 (file)
@@ -1227,6 +1227,78 @@ const (
        AXVSUBWODVWU
        AXVSUBWODQVU
 
+       AVMADDB
+       AVMADDH
+       AVMADDW
+       AVMADDV
+       AVMSUBB
+       AVMSUBH
+       AVMSUBW
+       AVMSUBV
+
+       AXVMADDB
+       AXVMADDH
+       AXVMADDW
+       AXVMADDV
+       AXVMSUBB
+       AXVMSUBH
+       AXVMSUBW
+       AXVMSUBV
+
+       AVMADDWEVHB
+       AVMADDWEVWH
+       AVMADDWEVVW
+       AVMADDWEVQV
+       AVMADDWODHB
+       AVMADDWODWH
+       AVMADDWODVW
+       AVMADDWODQV
+
+       AVMADDWEVHBU
+       AVMADDWEVWHU
+       AVMADDWEVVWU
+       AVMADDWEVQVU
+       AVMADDWODHBU
+       AVMADDWODWHU
+       AVMADDWODVWU
+       AVMADDWODQVU
+
+       AVMADDWEVHBUB
+       AVMADDWEVWHUH
+       AVMADDWEVVWUW
+       AVMADDWEVQVUV
+       AVMADDWODHBUB
+       AVMADDWODWHUH
+       AVMADDWODVWUW
+       AVMADDWODQVUV
+
+       AXVMADDWEVHB
+       AXVMADDWEVWH
+       AXVMADDWEVVW
+       AXVMADDWEVQV
+       AXVMADDWODHB
+       AXVMADDWODWH
+       AXVMADDWODVW
+       AXVMADDWODQV
+
+       AXVMADDWEVHBU
+       AXVMADDWEVWHU
+       AXVMADDWEVVWU
+       AXVMADDWEVQVU
+       AXVMADDWODHBU
+       AXVMADDWODWHU
+       AXVMADDWODVWU
+       AXVMADDWODQVU
+
+       AXVMADDWEVHBUB
+       AXVMADDWEVWHUH
+       AXVMADDWEVVWUW
+       AXVMADDWEVQVUV
+       AXVMADDWODHBUB
+       AXVMADDWODWHUH
+       AXVMADDWODVWUW
+       AXVMADDWODQVUV
+
        AVSHUF4IB
        AVSHUF4IH
        AVSHUF4IW
index 18f818cebaa63df88b9a33118e121cee49984222..b1fcbce196d61f5cb7f565e69a39b2599efacc03 100644 (file)
@@ -695,6 +695,70 @@ var Anames = []string{
        "XVSUBWODWHU",
        "XVSUBWODVWU",
        "XVSUBWODQVU",
+       "VMADDB",
+       "VMADDH",
+       "VMADDW",
+       "VMADDV",
+       "VMSUBB",
+       "VMSUBH",
+       "VMSUBW",
+       "VMSUBV",
+       "XVMADDB",
+       "XVMADDH",
+       "XVMADDW",
+       "XVMADDV",
+       "XVMSUBB",
+       "XVMSUBH",
+       "XVMSUBW",
+       "XVMSUBV",
+       "VMADDWEVHB",
+       "VMADDWEVWH",
+       "VMADDWEVVW",
+       "VMADDWEVQV",
+       "VMADDWODHB",
+       "VMADDWODWH",
+       "VMADDWODVW",
+       "VMADDWODQV",
+       "VMADDWEVHBU",
+       "VMADDWEVWHU",
+       "VMADDWEVVWU",
+       "VMADDWEVQVU",
+       "VMADDWODHBU",
+       "VMADDWODWHU",
+       "VMADDWODVWU",
+       "VMADDWODQVU",
+       "VMADDWEVHBUB",
+       "VMADDWEVWHUH",
+       "VMADDWEVVWUW",
+       "VMADDWEVQVUV",
+       "VMADDWODHBUB",
+       "VMADDWODWHUH",
+       "VMADDWODVWUW",
+       "VMADDWODQVUV",
+       "XVMADDWEVHB",
+       "XVMADDWEVWH",
+       "XVMADDWEVVW",
+       "XVMADDWEVQV",
+       "XVMADDWODHB",
+       "XVMADDWODWH",
+       "XVMADDWODVW",
+       "XVMADDWODQV",
+       "XVMADDWEVHBU",
+       "XVMADDWEVWHU",
+       "XVMADDWEVVWU",
+       "XVMADDWEVQVU",
+       "XVMADDWODHBU",
+       "XVMADDWODWHU",
+       "XVMADDWODVWU",
+       "XVMADDWODQVU",
+       "XVMADDWEVHBUB",
+       "XVMADDWEVWHUH",
+       "XVMADDWEVVWUW",
+       "XVMADDWEVQVUV",
+       "XVMADDWODHBUB",
+       "XVMADDWODWHUH",
+       "XVMADDWODVWUW",
+       "XVMADDWODQVUV",
        "VSHUF4IB",
        "VSHUF4IH",
        "VSHUF4IW",
index 6a2346009847fad09abaf2cb5e4f4085737645a7..e5f2014e956367ea39d19df3aa1f2ae36527d1e1 100644 (file)
@@ -1830,6 +1830,38 @@ func buildop(ctxt *obj.Link) {
                        opset(AVSUBWODWHU, r0)
                        opset(AVSUBWODVWU, r0)
                        opset(AVSUBWODQVU, r0)
+                       opset(AVMADDB, r0)
+                       opset(AVMADDH, r0)
+                       opset(AVMADDW, r0)
+                       opset(AVMADDV, r0)
+                       opset(AVMSUBB, r0)
+                       opset(AVMSUBH, r0)
+                       opset(AVMSUBW, r0)
+                       opset(AVMSUBV, r0)
+                       opset(AVMADDWEVHB, r0)
+                       opset(AVMADDWEVWH, r0)
+                       opset(AVMADDWEVVW, r0)
+                       opset(AVMADDWEVQV, r0)
+                       opset(AVMADDWODHB, r0)
+                       opset(AVMADDWODWH, r0)
+                       opset(AVMADDWODVW, r0)
+                       opset(AVMADDWODQV, r0)
+                       opset(AVMADDWEVHBU, r0)
+                       opset(AVMADDWEVWHU, r0)
+                       opset(AVMADDWEVVWU, r0)
+                       opset(AVMADDWEVQVU, r0)
+                       opset(AVMADDWODHBU, r0)
+                       opset(AVMADDWODWHU, r0)
+                       opset(AVMADDWODVWU, r0)
+                       opset(AVMADDWODQVU, r0)
+                       opset(AVMADDWEVHBUB, r0)
+                       opset(AVMADDWEVWHUH, r0)
+                       opset(AVMADDWEVVWUW, r0)
+                       opset(AVMADDWEVQVUV, r0)
+                       opset(AVMADDWODHBUB, r0)
+                       opset(AVMADDWODWHUH, r0)
+                       opset(AVMADDWODVWUW, r0)
+                       opset(AVMADDWODQVUV, r0)
 
                case AXVSLTB:
                        opset(AXVSLTH, r0)
@@ -1871,6 +1903,38 @@ func buildop(ctxt *obj.Link) {
                        opset(AXVSUBWODWHU, r0)
                        opset(AXVSUBWODVWU, r0)
                        opset(AXVSUBWODQVU, r0)
+                       opset(AXVMADDB, r0)
+                       opset(AXVMADDH, r0)
+                       opset(AXVMADDW, r0)
+                       opset(AXVMADDV, r0)
+                       opset(AXVMSUBB, r0)
+                       opset(AXVMSUBH, r0)
+                       opset(AXVMSUBW, r0)
+                       opset(AXVMSUBV, r0)
+                       opset(AXVMADDWEVHB, r0)
+                       opset(AXVMADDWEVWH, r0)
+                       opset(AXVMADDWEVVW, r0)
+                       opset(AXVMADDWEVQV, r0)
+                       opset(AXVMADDWODHB, r0)
+                       opset(AXVMADDWODWH, r0)
+                       opset(AXVMADDWODVW, r0)
+                       opset(AXVMADDWODQV, r0)
+                       opset(AXVMADDWEVHBU, r0)
+                       opset(AXVMADDWEVWHU, r0)
+                       opset(AXVMADDWEVVWU, r0)
+                       opset(AXVMADDWEVQVU, r0)
+                       opset(AXVMADDWODHBU, r0)
+                       opset(AXVMADDWODWHU, r0)
+                       opset(AXVMADDWODVWU, r0)
+                       opset(AXVMADDWODQVU, r0)
+                       opset(AXVMADDWEVHBUB, r0)
+                       opset(AXVMADDWEVWHUH, r0)
+                       opset(AXVMADDWEVVWUW, r0)
+                       opset(AXVMADDWEVQVUV, r0)
+                       opset(AXVMADDWODHBUB, r0)
+                       opset(AXVMADDWODWHUH, r0)
+                       opset(AXVMADDWODVWUW, r0)
+                       opset(AXVMADDWODQVUV, r0)
 
                case AVANDB:
                        opset(AVORB, r0)
@@ -3811,6 +3875,134 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
                return 0x0E86A << 15 // xvsubwod.d.wu
        case AXVSUBWODQVU:
                return 0x0E86B << 15 // xvsubwod.q.du
+       case AVMADDB:
+               return 0x0E150 << 15 // vmadd.b
+       case AVMADDH:
+               return 0x0E151 << 15 // vmadd.h
+       case AVMADDW:
+               return 0x0E152 << 15 // vmadd.w
+       case AVMADDV:
+               return 0x0E153 << 15 // vmadd.d
+       case AVMSUBB:
+               return 0x0E154 << 15 // vmsub.b
+       case AVMSUBH:
+               return 0x0E155 << 15 // vmsub.h
+       case AVMSUBW:
+               return 0x0E156 << 15 // vmsub.w
+       case AVMSUBV:
+               return 0x0E157 << 15 // vmsub.d
+       case AXVMADDB:
+               return 0x0E950 << 15 // xvmadd.b
+       case AXVMADDH:
+               return 0x0E951 << 15 // xvmadd.h
+       case AXVMADDW:
+               return 0x0E952 << 15 // xvmadd.w
+       case AXVMADDV:
+               return 0x0E953 << 15 // xvmadd.d
+       case AXVMSUBB:
+               return 0x0E954 << 15 // xvmsub.b
+       case AXVMSUBH:
+               return 0x0E955 << 15 // xvmsub.h
+       case AXVMSUBW:
+               return 0x0E956 << 15 // xvmsub.w
+       case AXVMSUBV:
+               return 0x0E957 << 15 // xvmsub.d
+       case AVMADDWEVHB:
+               return 0x0E158 << 15 // vmaddwev.h.b
+       case AVMADDWEVWH:
+               return 0x0E159 << 15 // vmaddwev.w.h
+       case AVMADDWEVVW:
+               return 0x0E15A << 15 // vmaddwev.d.w
+       case AVMADDWEVQV:
+               return 0x0E15B << 15 // vmaddwev.q.d
+       case AVMADDWODHB:
+               return 0x0E15C << 15 // vmaddwov.h.b
+       case AVMADDWODWH:
+               return 0x0E15D << 15 // vmaddwod.w.h
+       case AVMADDWODVW:
+               return 0x0E15E << 15 // vmaddwod.d.w
+       case AVMADDWODQV:
+               return 0x0E15F << 15 // vmaddwod.q.d
+       case AVMADDWEVHBU:
+               return 0x0E168 << 15 // vmaddwev.h.bu
+       case AVMADDWEVWHU:
+               return 0x0E169 << 15 // vmaddwev.w.hu
+       case AVMADDWEVVWU:
+               return 0x0E16A << 15 // vmaddwev.d.wu
+       case AVMADDWEVQVU:
+               return 0x0E16B << 15 // vmaddwev.q.du
+       case AVMADDWODHBU:
+               return 0x0E16C << 15 // vmaddwov.h.bu
+       case AVMADDWODWHU:
+               return 0x0E16D << 15 // vmaddwod.w.hu
+       case AVMADDWODVWU:
+               return 0x0E16E << 15 // vmaddwod.d.wu
+       case AVMADDWODQVU:
+               return 0x0E16F << 15 // vmaddwod.q.du
+       case AVMADDWEVHBUB:
+               return 0x0E178 << 15 // vmaddwev.h.bu.b
+       case AVMADDWEVWHUH:
+               return 0x0E179 << 15 // vmaddwev.w.hu.h
+       case AVMADDWEVVWUW:
+               return 0x0E17A << 15 // vmaddwev.d.wu.w
+       case AVMADDWEVQVUV:
+               return 0x0E17B << 15 // vmaddwev.q.du.d
+       case AVMADDWODHBUB:
+               return 0x0E17C << 15 // vmaddwov.h.bu.b
+       case AVMADDWODWHUH:
+               return 0x0E17D << 15 // vmaddwod.w.hu.h
+       case AVMADDWODVWUW:
+               return 0x0E17E << 15 // vmaddwod.d.wu.w
+       case AVMADDWODQVUV:
+               return 0x0E17F << 15 // vmaddwod.q.du.d
+       case AXVMADDWEVHB:
+               return 0x0E958 << 15 // xvmaddwev.h.b
+       case AXVMADDWEVWH:
+               return 0x0E959 << 15 // xvmaddwev.w.h
+       case AXVMADDWEVVW:
+               return 0x0E95A << 15 // xvmaddwev.d.w
+       case AXVMADDWEVQV:
+               return 0x0E95B << 15 // xvmaddwev.q.d
+       case AXVMADDWODHB:
+               return 0x0E95C << 15 // xvmaddwov.h.b
+       case AXVMADDWODWH:
+               return 0x0E95D << 15 // xvmaddwod.w.h
+       case AXVMADDWODVW:
+               return 0x0E95E << 15 // xvmaddwod.d.w
+       case AXVMADDWODQV:
+               return 0x0E95F << 15 // xvmaddwod.q.d
+       case AXVMADDWEVHBU:
+               return 0x0E968 << 15 // xvmaddwev.h.bu
+       case AXVMADDWEVWHU:
+               return 0x0E969 << 15 // xvmaddwev.w.hu
+       case AXVMADDWEVVWU:
+               return 0x0E96A << 15 // xvmaddwev.d.wu
+       case AXVMADDWEVQVU:
+               return 0x0E96B << 15 // xvmaddwev.q.du
+       case AXVMADDWODHBU:
+               return 0x0E96C << 15 // xvmaddwov.h.bu
+       case AXVMADDWODWHU:
+               return 0x0E96D << 15 // xvmaddwod.w.hu
+       case AXVMADDWODVWU:
+               return 0x0E96E << 15 // xvmaddwod.d.wu
+       case AXVMADDWODQVU:
+               return 0x0E96F << 15 // xvmaddwod.q.du
+       case AXVMADDWEVHBUB:
+               return 0x0E978 << 15 // xvmaddwev.h.bu.b
+       case AXVMADDWEVWHUH:
+               return 0x0E979 << 15 // xvmaddwev.w.hu.h
+       case AXVMADDWEVVWUW:
+               return 0x0E97A << 15 // xvmaddwev.d.wu.w
+       case AXVMADDWEVQVUV:
+               return 0x0E97B << 15 // xvmaddwev.q.du.d
+       case AXVMADDWODHBUB:
+               return 0x0E97C << 15 // xvmaddwov.h.bu.b
+       case AXVMADDWODWHUH:
+               return 0x0E97D << 15 // xvmaddwod.w.hu.h
+       case AXVMADDWODVWUW:
+               return 0x0E97E << 15 // xvmaddwod.d.wu.w
+       case AXVMADDWODQVUV:
+               return 0x0E97F << 15 // xvmaddwod.q.du.d
        case AVSLLB:
                return 0xe1d0 << 15 // vsll.b
        case AVSLLH: