]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/loong64: add {,X}V{ADD,SUB}W{EV,OD}.{H.B,W.H,D.W,Q.D}{,U} instructio...
authorGuoqi Chen <chenguoqi@loongson.cn>
Mon, 24 Nov 2025 04:56:15 +0000 (12:56 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Wed, 26 Nov 2025 00:50:48 +0000 (16:50 -0800)
Go asm syntax:
 VADDWEV{HB, WH, VW, QV}{,U}        V1, V2, V3
 VSUBWEV{HB, WH, VW, QV}{,U}        V1, V2, V3
 VADDWOD{HB, WH, VW, QV}{,U}        V1, V2, V3
 VSUBWOD{HB, WH, VW, QV}{,U}        V1, V2, V3
XVADDWEV{HB, WH, VW, QV}{,U}        X1, X2, X3
XVSUBWEV{HB, WH, VW, QV}{,U}        X1, X2, X3
XVADDWOD{HB, WH, VW, QV}{,U}        X1, X2, X3
XVSUBWOD{HB, WH, VW, QV}{,U}        X1, X2, X3

Equivalent platform assembler syntax:
 vaddwev.{h.b, w.h, d.w, q.d}{,u}   V3, V2, V1
 vsubwev.{h.b, w.h, d.w, q.d}{,u}   V3, V2, V1
 vaddwod.{h.b, w.h, d.w, q.d}{,u}   V3, V2, V1
 vsubwod.{h.b, w.h, d.w, q.d}{,u}   V3, V2, V1
xvaddwev.{h.b, w.h, d.w, q.d}{,u}   X3, X2, X1
xvsubwev.{h.b, w.h, d.w, q.d}{,u}   X3, X2, X1
xvaddwod.{h.b, w.h, d.w, q.d}{,u}   X3, X2, X1
xvsubwod.{h.b, w.h, d.w, q.d}{,u}   X3, X2, X1

Change-Id: I407dc65b32b89844fd303e265a99d8aafdf922ec
Reviewed-on: https://go-review.googlesource.com/c/go/+/723620
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Mark Freeman <markfreeman@google.com>
src/cmd/asm/internal/asm/testdata/loong64enc1.s
src/cmd/internal/obj/loong64/a.out.go
src/cmd/internal/obj/loong64/anames.go
src/cmd/internal/obj/loong64/asm.go

index b3fcf7db15e02a052b0ed3af022dc66a7be6ae6d..fc6e27741690889782a4eaf985a407a19b5a055f 100644 (file)
@@ -1075,6 +1075,78 @@ lable2:
        XVMULWODVWUW    X1, X2, X3      // 4304a374
        XVMULWODQVUV    X1, X2, X3      // 4384a374
 
+       // [X]VADDW{EV/OD}.{H.B/W.H/D.W/Q.D} instructions
+       VADDWEVHB       V1, V2, V3      // 43041e70
+       VADDWEVWH       V1, V2, V3      // 43841e70
+       VADDWEVVW       V1, V2, V3      // 43041f70
+       VADDWEVQV       V1, V2, V3      // 43841f70
+       VADDWODHB       V1, V2, V3      // 43042270
+       VADDWODWH       V1, V2, V3      // 43842270
+       VADDWODVW       V1, V2, V3      // 43042370
+       VADDWODQV       V1, V2, V3      // 43842370
+       XVADDWEVHB      X1, X2, X3      // 43041e74
+       XVADDWEVWH      X1, X2, X3      // 43841e74
+       XVADDWEVVW      X1, X2, X3      // 43041f74
+       XVADDWEVQV      X1, X2, X3      // 43841f74
+       XVADDWODHB      X1, X2, X3      // 43042274
+       XVADDWODWH      X1, X2, X3      // 43842274
+       XVADDWODVW      X1, X2, X3      // 43042374
+       XVADDWODQV      X1, X2, X3      // 43842374
+
+       // [X]VSUBW{EV/OD}.{H.B/W.H/D.W/Q.D} instructions
+       VSUBWEVHB       V1, V2, V3      // 43042070
+       VSUBWEVWH       V1, V2, V3      // 43842070
+       VSUBWEVVW       V1, V2, V3      // 43042170
+       VSUBWEVQV       V1, V2, V3      // 43842170
+       VSUBWODHB       V1, V2, V3      // 43042470
+       VSUBWODWH       V1, V2, V3      // 43842470
+       VSUBWODVW       V1, V2, V3      // 43042570
+       VSUBWODQV       V1, V2, V3      // 43842570
+       XVSUBWEVHB      X1, X2, X3      // 43042074
+       XVSUBWEVWH      X1, X2, X3      // 43842074
+       XVSUBWEVVW      X1, X2, X3      // 43042174
+       XVSUBWEVQV      X1, X2, X3      // 43842174
+       XVSUBWODHB      X1, X2, X3      // 43042474
+       XVSUBWODWH      X1, X2, X3      // 43842474
+       XVSUBWODVW      X1, X2, X3      // 43042574
+       XVSUBWODQV      X1, X2, X3      // 43842574
+
+       // [X]VADDW{EV/OD}.{H.B/W.H/D.W/Q.D}U instructions
+       VADDWEVHBU      V1, V2, V3      // 43042e70
+       VADDWEVWHU      V1, V2, V3      // 43042f70
+       VADDWEVVWU      V1, V2, V3      // 43042f70
+       VADDWEVQVU      V1, V2, V3      // 43842f70
+       VADDWODHBU      V1, V2, V3      // 43043270
+       VADDWODWHU      V1, V2, V3      // 43843270
+       VADDWODVWU      V1, V2, V3      // 43043370
+       VADDWODQVU      V1, V2, V3      // 43843370
+       XVADDWEVHBU     X1, X2, X3      // 43042e74
+       XVADDWEVWHU     X1, X2, X3      // 43842e74
+       XVADDWEVVWU     X1, X2, X3      // 43042f74
+       XVADDWEVQVU     X1, X2, X3      // 43842f74
+       XVADDWODHBU     X1, X2, X3      // 43043274
+       XVADDWODWHU     X1, X2, X3      // 43843274
+       XVADDWODVWU     X1, X2, X3      // 43043374
+       XVADDWODQVU     X1, X2, X3      // 43843374
+
+       // [X]VSUBW{EV/OD}.{H.B/W.H/D.W/Q.D}U instructions
+       VSUBWEVHBU      V1, V2, V3      // 43043070
+       VSUBWEVWHU      V1, V2, V3      // 43843070
+       VSUBWEVVWU      V1, V2, V3      // 43043170
+       VSUBWEVQVU      V1, V2, V3      // 43843170
+       VSUBWODHBU      V1, V2, V3      // 43043470
+       VSUBWODWHU      V1, V2, V3      // 43843470
+       VSUBWODVWU      V1, V2, V3      // 43043570
+       VSUBWODQVU      V1, V2, V3      // 43843570
+       XVSUBWEVHBU     X1, X2, X3      // 43043074
+       XVSUBWEVWHU     X1, X2, X3      // 43843074
+       XVSUBWEVVWU     X1, X2, X3      // 43043174
+       XVSUBWEVQVU     X1, X2, X3      // 43843174
+       XVSUBWODHBU     X1, X2, X3      // 43043474
+       XVSUBWODWHU     X1, X2, X3      // 43843474
+       XVSUBWODVWU     X1, X2, X3      // 43043574
+       XVSUBWODQVU     X1, X2, X3      // 43843574
+
        // [X]VSHUF4I.{B/H/W/D} instructions
        VSHUF4IB        $0, V2, V1      // 41009073
        VSHUF4IB        $16, V2, V1     // 41409073
index 2eabe9bda8ae6669ff07ec33936a279b663cf56e..96f0889199dc19bedfa0b815fd03103382efcf70 100644 (file)
@@ -1159,6 +1159,71 @@ const (
        AXVMULWODVWUW
        AXVMULWODQVUV
 
+       AVADDWEVHB
+       AVADDWEVWH
+       AVADDWEVVW
+       AVADDWEVQV
+       AVSUBWEVHB
+       AVSUBWEVWH
+       AVSUBWEVVW
+       AVSUBWEVQV
+       AVADDWODHB
+       AVADDWODWH
+       AVADDWODVW
+       AVADDWODQV
+       AVSUBWODHB
+       AVSUBWODWH
+       AVSUBWODVW
+       AVSUBWODQV
+       AXVADDWEVHB
+       AXVADDWEVWH
+       AXVADDWEVVW
+       AXVADDWEVQV
+       AXVSUBWEVHB
+       AXVSUBWEVWH
+       AXVSUBWEVVW
+       AXVSUBWEVQV
+       AXVADDWODHB
+       AXVADDWODWH
+       AXVADDWODVW
+       AXVADDWODQV
+       AXVSUBWODHB
+       AXVSUBWODWH
+       AXVSUBWODVW
+       AXVSUBWODQV
+       AVADDWEVHBU
+       AVADDWEVWHU
+       AVADDWEVVWU
+       AVADDWEVQVU
+       AVSUBWEVHBU
+       AVSUBWEVWHU
+       AVSUBWEVVWU
+       AVSUBWEVQVU
+       AVADDWODHBU
+       AVADDWODWHU
+       AVADDWODVWU
+       AVADDWODQVU
+       AVSUBWODHBU
+       AVSUBWODWHU
+       AVSUBWODVWU
+       AVSUBWODQVU
+       AXVADDWEVHBU
+       AXVADDWEVWHU
+       AXVADDWEVVWU
+       AXVADDWEVQVU
+       AXVSUBWEVHBU
+       AXVSUBWEVWHU
+       AXVSUBWEVVWU
+       AXVSUBWEVQVU
+       AXVADDWODHBU
+       AXVADDWODWHU
+       AXVADDWODVWU
+       AXVADDWODQVU
+       AXVSUBWODHBU
+       AXVSUBWODWHU
+       AXVSUBWODVWU
+       AXVSUBWODQVU
+
        AVSHUF4IB
        AVSHUF4IH
        AVSHUF4IW
index 92e3cab950f8daa45ea77253a20151b89cf2e9eb..0ee911401f03698f333ca0de2f912ba0f35d6496 100644 (file)
@@ -628,6 +628,70 @@ var Anames = []string{
        "XVMULWODWHUH",
        "XVMULWODVWUW",
        "XVMULWODQVUV",
+       "VADDWEVHB",
+       "VADDWEVWH",
+       "VADDWEVVW",
+       "VADDWEVQV",
+       "VSUBWEVHB",
+       "VSUBWEVWH",
+       "VSUBWEVVW",
+       "VSUBWEVQV",
+       "VADDWODHB",
+       "VADDWODWH",
+       "VADDWODVW",
+       "VADDWODQV",
+       "VSUBWODHB",
+       "VSUBWODWH",
+       "VSUBWODVW",
+       "VSUBWODQV",
+       "XVADDWEVHB",
+       "XVADDWEVWH",
+       "XVADDWEVVW",
+       "XVADDWEVQV",
+       "XVSUBWEVHB",
+       "XVSUBWEVWH",
+       "XVSUBWEVVW",
+       "XVSUBWEVQV",
+       "XVADDWODHB",
+       "XVADDWODWH",
+       "XVADDWODVW",
+       "XVADDWODQV",
+       "XVSUBWODHB",
+       "XVSUBWODWH",
+       "XVSUBWODVW",
+       "XVSUBWODQV",
+       "VADDWEVHBU",
+       "VADDWEVWHU",
+       "VADDWEVVWU",
+       "VADDWEVQVU",
+       "VSUBWEVHBU",
+       "VSUBWEVWHU",
+       "VSUBWEVVWU",
+       "VSUBWEVQVU",
+       "VADDWODHBU",
+       "VADDWODWHU",
+       "VADDWODVWU",
+       "VADDWODQVU",
+       "VSUBWODHBU",
+       "VSUBWODWHU",
+       "VSUBWODVWU",
+       "VSUBWODQVU",
+       "XVADDWEVHBU",
+       "XVADDWEVWHU",
+       "XVADDWEVVWU",
+       "XVADDWEVQVU",
+       "XVSUBWEVHBU",
+       "XVSUBWEVWHU",
+       "XVSUBWEVVWU",
+       "XVSUBWEVQVU",
+       "XVADDWODHBU",
+       "XVADDWODWHU",
+       "XVADDWODVWU",
+       "XVADDWODQVU",
+       "XVSUBWODHBU",
+       "XVSUBWODWHU",
+       "XVSUBWODVWU",
+       "XVSUBWODQVU",
        "VSHUF4IB",
        "VSHUF4IH",
        "VSHUF4IW",
index 857ef31ca3ac1af2e49cef8b4064e19dcd2ec969..9aff344931dcaa69a0c34e9144dd6e93d99ac1bb 100644 (file)
@@ -1791,6 +1791,38 @@ func buildop(ctxt *obj.Link) {
                        opset(AVSLTHU, r0)
                        opset(AVSLTWU, r0)
                        opset(AVSLTVU, r0)
+                       opset(AVADDWEVHB, r0)
+                       opset(AVADDWEVWH, r0)
+                       opset(AVADDWEVVW, r0)
+                       opset(AVADDWEVQV, r0)
+                       opset(AVSUBWEVHB, r0)
+                       opset(AVSUBWEVWH, r0)
+                       opset(AVSUBWEVVW, r0)
+                       opset(AVSUBWEVQV, r0)
+                       opset(AVADDWODHB, r0)
+                       opset(AVADDWODWH, r0)
+                       opset(AVADDWODVW, r0)
+                       opset(AVADDWODQV, r0)
+                       opset(AVSUBWODHB, r0)
+                       opset(AVSUBWODWH, r0)
+                       opset(AVSUBWODVW, r0)
+                       opset(AVSUBWODQV, r0)
+                       opset(AVADDWEVHBU, r0)
+                       opset(AVADDWEVWHU, r0)
+                       opset(AVADDWEVVWU, r0)
+                       opset(AVADDWEVQVU, r0)
+                       opset(AVSUBWEVHBU, r0)
+                       opset(AVSUBWEVWHU, r0)
+                       opset(AVSUBWEVVWU, r0)
+                       opset(AVSUBWEVQVU, r0)
+                       opset(AVADDWODHBU, r0)
+                       opset(AVADDWODWHU, r0)
+                       opset(AVADDWODVWU, r0)
+                       opset(AVADDWODQVU, r0)
+                       opset(AVSUBWODHBU, r0)
+                       opset(AVSUBWODWHU, r0)
+                       opset(AVSUBWODVWU, r0)
+                       opset(AVSUBWODQVU, r0)
 
                case AXVSLTB:
                        opset(AXVSLTH, r0)
@@ -1800,6 +1832,38 @@ func buildop(ctxt *obj.Link) {
                        opset(AXVSLTHU, r0)
                        opset(AXVSLTWU, r0)
                        opset(AXVSLTVU, r0)
+                       opset(AXVADDWEVHB, r0)
+                       opset(AXVADDWEVWH, r0)
+                       opset(AXVADDWEVVW, r0)
+                       opset(AXVADDWEVQV, r0)
+                       opset(AXVSUBWEVHB, r0)
+                       opset(AXVSUBWEVWH, r0)
+                       opset(AXVSUBWEVVW, r0)
+                       opset(AXVSUBWEVQV, r0)
+                       opset(AXVADDWODHB, r0)
+                       opset(AXVADDWODWH, r0)
+                       opset(AXVADDWODVW, r0)
+                       opset(AXVADDWODQV, r0)
+                       opset(AXVSUBWODHB, r0)
+                       opset(AXVSUBWODWH, r0)
+                       opset(AXVSUBWODVW, r0)
+                       opset(AXVSUBWODQV, r0)
+                       opset(AXVADDWEVHBU, r0)
+                       opset(AXVADDWEVWHU, r0)
+                       opset(AXVADDWEVVWU, r0)
+                       opset(AXVADDWEVQVU, r0)
+                       opset(AXVSUBWEVHBU, r0)
+                       opset(AXVSUBWEVWHU, r0)
+                       opset(AXVSUBWEVVWU, r0)
+                       opset(AXVSUBWEVQVU, r0)
+                       opset(AXVADDWODHBU, r0)
+                       opset(AXVADDWODWHU, r0)
+                       opset(AXVADDWODVWU, r0)
+                       opset(AXVADDWODQVU, r0)
+                       opset(AXVSUBWODHBU, r0)
+                       opset(AXVSUBWODWHU, r0)
+                       opset(AXVSUBWODVWU, r0)
+                       opset(AXVSUBWODQVU, r0)
 
                case AVANDB:
                        opset(AVORB, r0)
@@ -3612,6 +3676,134 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
                return 0xe946 << 15 // xvmulwod.d.wu.w
        case AXVMULWODQVUV:
                return 0xe947 << 15 // xvmulwod.q.du.d
+       case AVADDWEVHB:
+               return 0x0E03C << 15 // vaddwev.h.b
+       case AVADDWEVWH:
+               return 0x0E03D << 15 // vaddwev.w.h
+       case AVADDWEVVW:
+               return 0x0E03E << 15 // vaddwev.d.w
+       case AVADDWEVQV:
+               return 0x0E03F << 15 // vaddwev.q.d
+       case AVSUBWEVHB:
+               return 0x0E040 << 15 // vsubwev.h.b
+       case AVSUBWEVWH:
+               return 0x0E041 << 15 // vsubwev.w.h
+       case AVSUBWEVVW:
+               return 0x0E042 << 15 // vsubwev.d.w
+       case AVSUBWEVQV:
+               return 0x0E043 << 15 // vsubwev.q.d
+       case AVADDWODHB:
+               return 0x0E044 << 15 // vaddwod.h.b
+       case AVADDWODWH:
+               return 0x0E045 << 15 // vaddwod.w.h
+       case AVADDWODVW:
+               return 0x0E046 << 15 // vaddwod.d.w
+       case AVADDWODQV:
+               return 0x0E047 << 15 // vaddwod.q.d
+       case AVSUBWODHB:
+               return 0x0E048 << 15 // vsubwod.h.b
+       case AVSUBWODWH:
+               return 0x0E049 << 15 // vsubwod.w.h
+       case AVSUBWODVW:
+               return 0x0E04A << 15 // vsubwod.d.w
+       case AVSUBWODQV:
+               return 0x0E04B << 15 // vsubwod.q.d
+       case AXVADDWEVHB:
+               return 0x0E83C << 15 // xvaddwev.h.b
+       case AXVADDWEVWH:
+               return 0x0E83D << 15 // xvaddwev.w.h
+       case AXVADDWEVVW:
+               return 0x0E83E << 15 // xvaddwev.d.w
+       case AXVADDWEVQV:
+               return 0x0E83F << 15 // xvaddwev.q.d
+       case AXVSUBWEVHB:
+               return 0x0E840 << 15 // xvsubwev.h.b
+       case AXVSUBWEVWH:
+               return 0x0E841 << 15 // xvsubwev.w.h
+       case AXVSUBWEVVW:
+               return 0x0E842 << 15 // xvsubwev.d.w
+       case AXVSUBWEVQV:
+               return 0x0E843 << 15 // xvsubwev.q.d
+       case AXVADDWODHB:
+               return 0x0E844 << 15 // xvaddwod.h.b
+       case AXVADDWODWH:
+               return 0x0E845 << 15 // xvaddwod.w.h
+       case AXVADDWODVW:
+               return 0x0E846 << 15 // xvaddwod.d.w
+       case AXVADDWODQV:
+               return 0x0E847 << 15 // xvaddwod.q.d
+       case AXVSUBWODHB:
+               return 0x0E848 << 15 // xvsubwod.h.b
+       case AXVSUBWODWH:
+               return 0x0E849 << 15 // xvsubwod.w.h
+       case AXVSUBWODVW:
+               return 0x0E84A << 15 // xvsubwod.d.w
+       case AXVSUBWODQV:
+               return 0x0E84B << 15 // xvsubwod.q.d
+       case AVADDWEVHBU:
+               return 0x0E05C << 15 // vaddwev.h.bu
+       case AVADDWEVWHU:
+               return 0x0E05E << 15 // vaddwev.w.hu
+       case AVADDWEVVWU:
+               return 0x0E05E << 15 // vaddwev.d.wu
+       case AVADDWEVQVU:
+               return 0x0E05F << 15 // vaddwev.q.du
+       case AVSUBWEVHBU:
+               return 0x0E060 << 15 // vsubwev.h.bu
+       case AVSUBWEVWHU:
+               return 0x0E061 << 15 // vsubwev.w.hu
+       case AVSUBWEVVWU:
+               return 0x0E062 << 15 // vsubwev.d.wu
+       case AVSUBWEVQVU:
+               return 0x0E063 << 15 // vsubwev.q.du
+       case AVADDWODHBU:
+               return 0x0E064 << 15 // vaddwod.h.bu
+       case AVADDWODWHU:
+               return 0x0E065 << 15 // vaddwod.w.hu
+       case AVADDWODVWU:
+               return 0x0E066 << 15 // vaddwod.d.wu
+       case AVADDWODQVU:
+               return 0x0E067 << 15 // vaddwod.q.du
+       case AVSUBWODHBU:
+               return 0x0E068 << 15 // vsubwod.h.bu
+       case AVSUBWODWHU:
+               return 0x0E069 << 15 // vsubwod.w.hu
+       case AVSUBWODVWU:
+               return 0x0E06A << 15 // vsubwod.d.wu
+       case AVSUBWODQVU:
+               return 0x0E06B << 15 // vsubwod.q.du
+       case AXVADDWEVHBU:
+               return 0x0E85C << 15 // xvaddwev.h.bu
+       case AXVADDWEVWHU:
+               return 0x0E85D << 15 // xvaddwev.w.hu
+       case AXVADDWEVVWU:
+               return 0x0E85E << 15 // xvaddwev.d.wu
+       case AXVADDWEVQVU:
+               return 0x0E85F << 15 // xvaddwev.q.du
+       case AXVSUBWEVHBU:
+               return 0x0E860 << 15 // xvsubwev.h.bu
+       case AXVSUBWEVWHU:
+               return 0x0E861 << 15 // xvsubwev.w.hu
+       case AXVSUBWEVVWU:
+               return 0x0E862 << 15 // xvsubwev.d.wu
+       case AXVSUBWEVQVU:
+               return 0x0E863 << 15 // xvsubwev.q.du
+       case AXVADDWODHBU:
+               return 0x0E864 << 15 // xvaddwod.h.bu
+       case AXVADDWODWHU:
+               return 0x0E865 << 15 // xvaddwod.w.hu
+       case AXVADDWODVWU:
+               return 0x0E866 << 15 // xvaddwod.d.wu
+       case AXVADDWODQVU:
+               return 0x0E867 << 15 // xvaddwod.q.du
+       case AXVSUBWODHBU:
+               return 0x0E868 << 15 // xvsubwod.h.bu
+       case AXVSUBWODWHU:
+               return 0x0E869 << 15 // xvsubwod.w.hu
+       case AXVSUBWODVWU:
+               return 0x0E86A << 15 // xvsubwod.d.wu
+       case AXVSUBWODQVU:
+               return 0x0E86B << 15 // xvsubwod.q.du
        case AVSLLB:
                return 0xe1d0 << 15 // vsll.b
        case AVSLLH: