]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/loong64: add {V,XV}{ADD/SUB}.{B,H,W,D,Q} and {V,XV}{ADD/SUB}.{B...
authorXiaolin Zhao <zhaoxiaolin@loongson.cn>
Thu, 19 Dec 2024 13:07:42 +0000 (21:07 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Thu, 6 Mar 2025 02:29:37 +0000 (18:29 -0800)
Go asm syntax:
 V{ADD/SUB}{B,H,W,V,Q} VK, VJ, VD
XV{ADD/SUB}{B,H,W,V,Q} XK, XJ, XD
 V{ADD/SUB}{B,H,W,V}U $1, VJ, VD
XV{ADD/SUB}{B,H,W,V}U $1, XJ, XD

Equivalent platform assembler syntax:
 v{add/sub}.{b,h,w,d,q} vd, vj, vk
xv{add/sub}.{b,h,w,d,q} xd, xj, xk
 v{add/sub}i.{b,h,w,d}u vd, vj, $1
xv{add/sub}i.{b,h,w,d}u xd, xj, $1

Change-Id: Ia1ef0bc062f4403bb0b1514c2cf1c0264f5d22ee
Reviewed-on: https://go-review.googlesource.com/c/go/+/637795
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/cmd/asm/internal/asm/testdata/loong64enc1.s
src/cmd/internal/obj/loong64/a.out.go
src/cmd/internal/obj/loong64/anames.go
src/cmd/internal/obj/loong64/asm.go

index 3a8b8b8e5a5f122c5d4777e3d0ea683468f814a9..c3d1908e75d36ee3f3c024741f168ce55ceb2da7 100644 (file)
@@ -679,3 +679,43 @@ lable2:
        XVROTRV         $0, X2, X1      // 4100a176
        XVROTRV         $63, X2, X1     // 41fca176
        XVROTRV         $52, X2         // 42d0a176
+
+       // [X]VADD{B,H,W,V,Q}, [X]VSUB{B,H,W,V,Q} instructions
+       VADDB           V1, V2, V3      // 43040a70
+       VADDH           V1, V2, V3      // 43840a70
+       VADDW           V1, V2, V3      // 43040b70
+       VADDV           V1, V2, V3      // 43840b70
+       VADDQ           V1, V2, V3      // 43042d71
+       VSUBB           V1, V2, V3      // 43040c70
+       VSUBH           V1, V2, V3      // 43840c70
+       VSUBW           V1, V2, V3      // 43040d70
+       VSUBV           V1, V2, V3      // 43840d70
+       VSUBQ           V1, V2, V3      // 43842d71
+       XVADDB          X3, X2, X1      // 410c0a74
+       XVADDH          X3, X2, X1      // 418c0a74
+       XVADDW          X3, X2, X1      // 410c0b74
+       XVADDV          X3, X2, X1      // 418c0b74
+       XVADDQ          X3, X2, X1      // 410c2d75
+       XVSUBB          X3, X2, X1      // 410c0c74
+       XVSUBH          X3, X2, X1      // 418c0c74
+       XVSUBW          X3, X2, X1      // 410c0d74
+       XVSUBV          X3, X2, X1      // 418c0d74
+       XVSUBQ          X3, X2, X1      // 418c2d75
+
+       // [X]VADD{B,H,W,V}U, [X]VSUB{B,H,W,V}U instructions
+       VADDBU          $1, V2, V1      // 41048a72
+       VADDHU          $2, V2, V1      // 41888a72
+       VADDWU          $3, V2, V1      // 410c8b72
+       VADDVU          $4, V2, V1      // 41908b72
+       VSUBBU          $5, V2, V1      // 41148c72
+       VSUBHU          $6, V2, V1      // 41988c72
+       VSUBWU          $7, V2, V1      // 411c8d72
+       VSUBVU          $8, V2, V1      // 41a08d72
+       XVADDBU         $9, X1, X2      // 22248a76
+       XVADDHU         $10, X1, X2     // 22a88a76
+       XVADDWU         $11, X1, X2     // 222c8b76
+       XVADDVU         $12, X1, X2     // 22b08b76
+       XVSUBBU         $13, X1, X2     // 22348c76
+       XVSUBHU         $14, X1, X2     // 22b88c76
+       XVSUBWU         $15, X1, X2     // 223c8d76
+       XVSUBVU         $16, X1, X2     // 22c08d76
index 842c800bf5eae8353262744b51b87f2603fa0d9f..2bc895b880acec760d99f8548fc20b3b7c5d02a1 100644 (file)
@@ -745,6 +745,44 @@ const (
        AVMOVQ
        AXVMOVQ
 
+       // LSX and LASX arithmetic instructions
+       AVADDB
+       AVADDH
+       AVADDW
+       AVADDV
+       AVADDQ
+       AXVADDB
+       AXVADDH
+       AXVADDW
+       AXVADDV
+       AXVADDQ
+       AVSUBB
+       AVSUBH
+       AVSUBW
+       AVSUBV
+       AVSUBQ
+       AXVSUBB
+       AXVSUBH
+       AXVSUBW
+       AXVSUBV
+       AXVSUBQ
+       AVADDBU
+       AVADDHU
+       AVADDWU
+       AVADDVU
+       AVSUBBU
+       AVSUBHU
+       AVSUBWU
+       AVSUBVU
+       AXVADDBU
+       AXVADDHU
+       AXVADDWU
+       AXVADDVU
+       AXVSUBBU
+       AXVSUBHU
+       AXVSUBWU
+       AXVSUBVU
+
        // LSX and LASX Bit-manipulation Instructions
        AVPCNTB
        AVPCNTH
index 82c38dde1a9c4e5ebac1bb07b3eea9e8c6b69b83..7201f7d961c7da01867a7524e9b72080aa2a89b7 100644 (file)
@@ -261,6 +261,42 @@ var Anames = []string{
        "FTINTRNEVD",
        "VMOVQ",
        "XVMOVQ",
+       "VADDB",
+       "VADDH",
+       "VADDW",
+       "VADDV",
+       "VADDQ",
+       "XVADDB",
+       "XVADDH",
+       "XVADDW",
+       "XVADDV",
+       "XVADDQ",
+       "VSUBB",
+       "VSUBH",
+       "VSUBW",
+       "VSUBV",
+       "VSUBQ",
+       "XVSUBB",
+       "XVSUBH",
+       "XVSUBW",
+       "XVSUBV",
+       "XVSUBQ",
+       "VADDBU",
+       "VADDHU",
+       "VADDWU",
+       "VADDVU",
+       "VSUBBU",
+       "VSUBHU",
+       "VSUBWU",
+       "VSUBVU",
+       "XVADDBU",
+       "XVADDHU",
+       "XVADDWU",
+       "XVADDVU",
+       "XVSUBBU",
+       "XVSUBHU",
+       "XVSUBWU",
+       "XVSUBVU",
        "VPCNTB",
        "VPCNTH",
        "VPCNTW",
index 96c0e7b8907b06b564e17e1589c0104f86b7dc86..3ede02210443b1564f63145bfd39e87b274e3b60 100644 (file)
@@ -89,6 +89,11 @@ var optab = []Optab{
        {AVSEQB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
        {AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
 
+       {AVADDB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
+       {AVADDB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
+       {AXVADDB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
+       {AXVADDB, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
+
        {AVSLLB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
        {AVSLLB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
        {AXVSLLB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
@@ -1550,6 +1555,28 @@ func buildop(ctxt *obj.Link) {
                        opset(AXVPCNTW, r0)
                        opset(AXVPCNTV, r0)
 
+               case AVADDB:
+                       opset(AVADDH, r0)
+                       opset(AVADDW, r0)
+                       opset(AVADDV, r0)
+                       opset(AVADDQ, r0)
+                       opset(AVSUBB, r0)
+                       opset(AVSUBH, r0)
+                       opset(AVSUBW, r0)
+                       opset(AVSUBV, r0)
+                       opset(AVSUBQ, r0)
+
+               case AXVADDB:
+                       opset(AXVADDH, r0)
+                       opset(AXVADDW, r0)
+                       opset(AXVADDV, r0)
+                       opset(AXVADDQ, r0)
+                       opset(AXVSUBB, r0)
+                       opset(AXVSUBH, r0)
+                       opset(AXVSUBW, r0)
+                       opset(AXVSUBV, r0)
+                       opset(AXVSUBQ, r0)
+
                case AVSLLB:
                        opset(AVSRLB, r0)
                        opset(AVSRAB, r0)
@@ -1574,11 +1601,27 @@ func buildop(ctxt *obj.Link) {
                        opset(AVSRLW, r0)
                        opset(AVSRAW, r0)
                        opset(AVROTRW, r0)
+                       opset(AVADDBU, r0)
+                       opset(AVADDHU, r0)
+                       opset(AVADDWU, r0)
+                       opset(AVADDVU, r0)
+                       opset(AVSUBBU, r0)
+                       opset(AVSUBHU, r0)
+                       opset(AVSUBWU, r0)
+                       opset(AVSUBVU, r0)
 
                case AXVSLLW:
                        opset(AXVSRLW, r0)
                        opset(AXVSRAW, r0)
                        opset(AXVROTRW, r0)
+                       opset(AXVADDBU, r0)
+                       opset(AXVADDHU, r0)
+                       opset(AXVADDWU, r0)
+                       opset(AXVADDVU, r0)
+                       opset(AXVSUBBU, r0)
+                       opset(AXVSUBHU, r0)
+                       opset(AXVSUBWU, r0)
+                       opset(AXVSUBVU, r0)
 
                case AVSLLV:
                        opset(AVSRLV, r0)
@@ -2779,6 +2822,46 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
                return 0xe9de << 15 // xvrotr.w
        case AXVROTRV:
                return 0xe9df << 15 // xvrotr.d
+       case AVADDB:
+               return 0xe014 << 15 // vadd.b
+       case AVADDH:
+               return 0xe015 << 15 // vadd.h
+       case AVADDW:
+               return 0xe016 << 15 // vadd.w
+       case AVADDV:
+               return 0xe017 << 15 // vadd.d
+       case AVADDQ:
+               return 0xe25a << 15 // vadd.q
+       case AVSUBB:
+               return 0xe018 << 15 // vsub.b
+       case AVSUBH:
+               return 0xe019 << 15 // vsub.h
+       case AVSUBW:
+               return 0xe01a << 15 // vsub.w
+       case AVSUBV:
+               return 0xe01b << 15 // vsub.d
+       case AVSUBQ:
+               return 0xe25b << 15 // vsub.q
+       case AXVADDB:
+               return 0xe814 << 15 // xvadd.b
+       case AXVADDH:
+               return 0xe815 << 15 // xvadd.h
+       case AXVADDW:
+               return 0xe816 << 15 // xvadd.w
+       case AXVADDV:
+               return 0xe817 << 15 // xvadd.d
+       case AXVADDQ:
+               return 0xea5a << 15 // xvadd.q
+       case AXVSUBB:
+               return 0xe818 << 15 // xvsub.b
+       case AXVSUBH:
+               return 0xe819 << 15 // xvsub.h
+       case AXVSUBW:
+               return 0xe81a << 15 // xvsub.w
+       case AXVSUBV:
+               return 0xe81b << 15 // xvsub.d
+       case AXVSUBQ:
+               return 0xea5b << 15 // xvsub.q
        }
 
        if a < 0 {
@@ -3170,6 +3253,38 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
                return 0x1dcd<<18 | 0x1<<15 // xvsrai.w
        case AXVSRAV:
                return 0x1dcd<<18 | 0x1<<16 // xvsrai.d
+       case AVADDBU:
+               return 0xe514 << 15 // vaddi.bu
+       case AVADDHU:
+               return 0xe515 << 15 // vaddi.hu
+       case AVADDWU:
+               return 0xe516 << 15 // vaddi.wu
+       case AVADDVU:
+               return 0xe517 << 15 // vaddi.du
+       case AVSUBBU:
+               return 0xe518 << 15 // vsubi.bu
+       case AVSUBHU:
+               return 0xe519 << 15 // vsubi.hu
+       case AVSUBWU:
+               return 0xe51a << 15 // vsubi.wu
+       case AVSUBVU:
+               return 0xe51b << 15 // vsubi.du
+       case AXVADDBU:
+               return 0xed14 << 15 // xvaddi.bu
+       case AXVADDHU:
+               return 0xed15 << 15 // xvaddi.hu
+       case AXVADDWU:
+               return 0xed16 << 15 // xvaddi.wu
+       case AXVADDVU:
+               return 0xed17 << 15 // xvaddi.du
+       case AXVSUBBU:
+               return 0xed18 << 15 // xvsubi.bu
+       case AXVSUBHU:
+               return 0xed19 << 15 // xvsubi.hu
+       case AXVSUBWU:
+               return 0xed1a << 15 // xvsubi.wu
+       case AXVSUBVU:
+               return 0xed1b << 15 // xvsubi.du
        }
 
        if a < 0 {