]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/loong64: add [X]VMULW{EV/OD} series instructions support
authorXiaolin Zhao <zhaoxiaolin@loongson.cn>
Mon, 17 Mar 2025 06:48:01 +0000 (14:48 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Tue, 25 Mar 2025 12:49:58 +0000 (05:49 -0700)
Go asm syntax:
 VMULW{EV/OD}{HB/WH/VW/QV}[U] VK, VJ, VD
XVMULW{EV/OD}{HB/WH/VW/QV}[U] XK, XJ, XD
 VMULW{EV/OD}{HBUB/WHUH/VWUW/QVUV} VK, VJ, VD
XVMULW{EV/OD}{HBUB/WHUH/VWUW/QVUV} XK, XJ, XD

Equivalent platform assembler syntax:
 vmulw{ev/od}.{h.b/w.h/d.w/q.d}[u] vd, vj, vk
xvmulw{ev/od}.{h.b/w.h/d.w/q.d}[u] xd, xj, xk
 vmulw{ev/od}.{h.bu.b/w.hu.h/d.wu.w/q.du.d} vd, vj, vk
xvmulw{ev/od}.{h.bu.b/w.hu.h/d.wu.w/q.du.d} xd, xj, xk

Change-Id: Ib1b5fb9605417a2b81841deae40e0e2beb90d03c
Reviewed-on: https://go-review.googlesource.com/c/go/+/658375
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/asm/internal/asm/testdata/loong64enc1.s
src/cmd/internal/obj/loong64/a.out.go
src/cmd/internal/obj/loong64/anames.go
src/cmd/internal/obj/loong64/asm.go

index 8da4824dbc6ea7a368e04acb894de2321dd44142..d6a0762aa8df780af3620be92d8cb9f3510cdc06 100644 (file)
@@ -859,3 +859,55 @@ lable2:
        XVNEGH          X2, X1          // 41349c76
        XVNEGW          X2, X1          // 41389c76
        XVNEGV          X2, X1          // 413c9c76
+
+       // [X]{VMULW}{EV/OD}.{H.B/W.H/D.W/Q.D}[U] instructions
+       VMULWEVHB       V1, V2, V3      // 43049070
+       VMULWEVWH       V1, V2, V3      // 43849070
+       VMULWEVVW       V1, V2, V3      // 43049170
+       VMULWEVQV       V1, V2, V3      // 43849170
+       VMULWODHB       V1, V2, V3      // 43049270
+       VMULWODWH       V1, V2, V3      // 43849270
+       VMULWODVW       V1, V2, V3      // 43049370
+       VMULWODQV       V1, V2, V3      // 43849370
+       VMULWEVHBU      V1, V2, V3      // 43049870
+       VMULWEVWHU      V1, V2, V3      // 43849870
+       VMULWEVVWU      V1, V2, V3      // 43049970
+       VMULWEVQVU      V1, V2, V3      // 43849970
+       VMULWODHBU      V1, V2, V3      // 43049a70
+       VMULWODWHU      V1, V2, V3      // 43849a70
+       VMULWODVWU      V1, V2, V3      // 43049b70
+       VMULWODQVU      V1, V2, V3      // 43849b70
+       XVMULWEVHB      X1, X2, X3      // 43049074
+       XVMULWEVWH      X1, X2, X3      // 43849074
+       XVMULWEVVW      X1, X2, X3      // 43049174
+       XVMULWEVQV      X1, X2, X3      // 43849174
+       XVMULWODHB      X1, X2, X3      // 43049274
+       XVMULWODWH      X1, X2, X3      // 43849274
+       XVMULWODVW      X1, X2, X3      // 43049374
+       XVMULWODQV      X1, X2, X3      // 43849374
+       XVMULWEVHBU     X1, X2, X3      // 43049874
+       XVMULWEVWHU     X1, X2, X3      // 43849874
+       XVMULWEVVWU     X1, X2, X3      // 43049974
+       XVMULWEVQVU     X1, X2, X3      // 43849974
+       XVMULWODHBU     X1, X2, X3      // 43049a74
+       XVMULWODWHU     X1, X2, X3      // 43849a74
+       XVMULWODVWU     X1, X2, X3      // 43049b74
+       XVMULWODQVU     X1, X2, X3      // 43849b74
+
+       // [X]{VMULW}{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D} instructions
+       VMULWEVHBUB     V1, V2, V3      // 4304a070
+       VMULWEVWHUH     V1, V2, V3      // 4384a070
+       VMULWEVVWUW     V1, V2, V3      // 4304a170
+       VMULWEVQVUV     V1, V2, V3      // 4384a170
+       VMULWODHBUB     V1, V2, V3      // 4304a270
+       VMULWODWHUH     V1, V2, V3      // 4384a270
+       VMULWODVWUW     V1, V2, V3      // 4304a370
+       VMULWODQVUV     V1, V2, V3      // 4384a370
+       XVMULWEVHBUB    X1, X2, X3      // 4304a074
+       XVMULWEVWHUH    X1, X2, X3      // 4384a074
+       XVMULWEVVWUW    X1, X2, X3      // 4304a174
+       XVMULWEVQVUV    X1, X2, X3      // 4384a174
+       XVMULWODHBUB    X1, X2, X3      // 4304a274
+       XVMULWODWHUH    X1, X2, X3      // 4384a274
+       XVMULWODVWUW    X1, X2, X3      // 4304a374
+       XVMULWODQVUV    X1, X2, X3      // 4384a374
index 7eaf5c0ce1cd0746d894c0fd0c5e4ec9b05ccb37..782691754feff3c97a0dd64ceb2fd4aad03ae99c 100644 (file)
@@ -960,6 +960,56 @@ const (
        AXVNEGW
        AXVNEGV
 
+       // LSX and LASX mul instructions that operate on even or odd positions
+       AVMULWEVHB
+       AVMULWEVWH
+       AVMULWEVVW
+       AVMULWEVQV
+       AVMULWODHB
+       AVMULWODWH
+       AVMULWODVW
+       AVMULWODQV
+       AVMULWEVHBU
+       AVMULWEVWHU
+       AVMULWEVVWU
+       AVMULWEVQVU
+       AVMULWODHBU
+       AVMULWODWHU
+       AVMULWODVWU
+       AVMULWODQVU
+       AXVMULWEVHB
+       AXVMULWEVWH
+       AXVMULWEVVW
+       AXVMULWEVQV
+       AXVMULWODHB
+       AXVMULWODWH
+       AXVMULWODVW
+       AXVMULWODQV
+       AXVMULWEVHBU
+       AXVMULWEVWHU
+       AXVMULWEVVWU
+       AXVMULWEVQVU
+       AXVMULWODHBU
+       AXVMULWODWHU
+       AXVMULWODVWU
+       AXVMULWODQVU
+       AVMULWEVHBUB
+       AVMULWEVWHUH
+       AVMULWEVVWUW
+       AVMULWEVQVUV
+       AVMULWODHBUB
+       AVMULWODWHUH
+       AVMULWODVWUW
+       AVMULWODQVUV
+       AXVMULWEVHBUB
+       AXVMULWEVWHUH
+       AXVMULWEVVWUW
+       AXVMULWEVQVUV
+       AXVMULWODHBUB
+       AXVMULWODWHUH
+       AXVMULWODVWUW
+       AXVMULWODQVUV
+
        ALAST
 
        // aliases
index eae240d5fde1e539aa0d129b1152ea2790a92558..887ada8a170c96446d68cdd279aa68bd93b282a9 100644 (file)
@@ -457,5 +457,53 @@ var Anames = []string{
        "XVNEGH",
        "XVNEGW",
        "XVNEGV",
+       "VMULWEVHB",
+       "VMULWEVWH",
+       "VMULWEVVW",
+       "VMULWEVQV",
+       "VMULWODHB",
+       "VMULWODWH",
+       "VMULWODVW",
+       "VMULWODQV",
+       "VMULWEVHBU",
+       "VMULWEVWHU",
+       "VMULWEVVWU",
+       "VMULWEVQVU",
+       "VMULWODHBU",
+       "VMULWODWHU",
+       "VMULWODVWU",
+       "VMULWODQVU",
+       "XVMULWEVHB",
+       "XVMULWEVWH",
+       "XVMULWEVVW",
+       "XVMULWEVQV",
+       "XVMULWODHB",
+       "XVMULWODWH",
+       "XVMULWODVW",
+       "XVMULWODQV",
+       "XVMULWEVHBU",
+       "XVMULWEVWHU",
+       "XVMULWEVVWU",
+       "XVMULWEVQVU",
+       "XVMULWODHBU",
+       "XVMULWODWHU",
+       "XVMULWODVWU",
+       "XVMULWODQVU",
+       "VMULWEVHBUB",
+       "VMULWEVWHUH",
+       "VMULWEVVWUW",
+       "VMULWEVQVUV",
+       "VMULWODHBUB",
+       "VMULWODWHUH",
+       "VMULWODVWUW",
+       "VMULWODQVUV",
+       "XVMULWEVHBUB",
+       "XVMULWEVWHUH",
+       "XVMULWEVVWUW",
+       "XVMULWEVQVUV",
+       "XVMULWODHBUB",
+       "XVMULWODWHUH",
+       "XVMULWODVWUW",
+       "XVMULWODQVUV",
        "LAST",
 }
index f128dc16e9d316756c3894cbdac5f16b298bb318..f0e3cd77b7f60578de577d3cfafa376b26499fa8 100644 (file)
@@ -1583,6 +1583,30 @@ func buildop(ctxt *obj.Link) {
                        opset(AVMODHU, r0)
                        opset(AVMODWU, r0)
                        opset(AVMODVU, r0)
+                       opset(AVMULWEVHB, r0)
+                       opset(AVMULWEVWH, r0)
+                       opset(AVMULWEVVW, r0)
+                       opset(AVMULWEVQV, r0)
+                       opset(AVMULWODHB, r0)
+                       opset(AVMULWODWH, r0)
+                       opset(AVMULWODVW, r0)
+                       opset(AVMULWODQV, r0)
+                       opset(AVMULWEVHBU, r0)
+                       opset(AVMULWEVWHU, r0)
+                       opset(AVMULWEVVWU, r0)
+                       opset(AVMULWEVQVU, r0)
+                       opset(AVMULWODHBU, r0)
+                       opset(AVMULWODWHU, r0)
+                       opset(AVMULWODVWU, r0)
+                       opset(AVMULWODQVU, r0)
+                       opset(AVMULWEVHBUB, r0)
+                       opset(AVMULWEVWHUH, r0)
+                       opset(AVMULWEVVWUW, r0)
+                       opset(AVMULWEVQVUV, r0)
+                       opset(AVMULWODHBUB, r0)
+                       opset(AVMULWODWHUH, r0)
+                       opset(AVMULWODVWUW, r0)
+                       opset(AVMULWODQVUV, r0)
 
                case AXVSEQB:
                        opset(AXVSEQH, r0)
@@ -1624,6 +1648,30 @@ func buildop(ctxt *obj.Link) {
                        opset(AXVMODHU, r0)
                        opset(AXVMODWU, r0)
                        opset(AXVMODVU, r0)
+                       opset(AXVMULWEVHB, r0)
+                       opset(AXVMULWEVWH, r0)
+                       opset(AXVMULWEVVW, r0)
+                       opset(AXVMULWEVQV, r0)
+                       opset(AXVMULWODHB, r0)
+                       opset(AXVMULWODWH, r0)
+                       opset(AXVMULWODVW, r0)
+                       opset(AXVMULWODQV, r0)
+                       opset(AXVMULWEVHBU, r0)
+                       opset(AXVMULWEVWHU, r0)
+                       opset(AXVMULWEVVWU, r0)
+                       opset(AXVMULWEVQVU, r0)
+                       opset(AXVMULWODHBU, r0)
+                       opset(AXVMULWODWHU, r0)
+                       opset(AXVMULWODVWU, r0)
+                       opset(AXVMULWODQVU, r0)
+                       opset(AXVMULWEVHBUB, r0)
+                       opset(AXVMULWEVWHUH, r0)
+                       opset(AXVMULWEVVWUW, r0)
+                       opset(AXVMULWEVQVUV, r0)
+                       opset(AXVMULWODHBUB, r0)
+                       opset(AXVMULWODWHUH, r0)
+                       opset(AXVMULWODVWUW, r0)
+                       opset(AXVMULWODQVUV, r0)
 
                case AVANDB:
                        opset(AVORB, r0)
@@ -2990,6 +3038,102 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
                return 0xe9ce << 15 // xvmod.wu
        case AXVMODVU:
                return 0xe9cf << 15 // xvmod.du
+       case AVMULWEVHB:
+               return 0xe120 << 15 // vmulwev.h.b
+       case AVMULWEVWH:
+               return 0xe121 << 15 // vmulwev.w.h
+       case AVMULWEVVW:
+               return 0xe122 << 15 // vmulwev.d.w
+       case AVMULWEVQV:
+               return 0xe123 << 15 // vmulwev.q.d
+       case AVMULWODHB:
+               return 0xe124 << 15 // vmulwod.h.b
+       case AVMULWODWH:
+               return 0xe125 << 15 // vmulwod.w.h
+       case AVMULWODVW:
+               return 0xe126 << 15 // vmulwod.d.w
+       case AVMULWODQV:
+               return 0xe127 << 15 // vmulwod.q.d
+       case AVMULWEVHBU:
+               return 0xe130 << 15 // vmulwev.h.bu
+       case AVMULWEVWHU:
+               return 0xe131 << 15 // vmulwev.w.hu
+       case AVMULWEVVWU:
+               return 0xe132 << 15 // vmulwev.d.wu
+       case AVMULWEVQVU:
+               return 0xe133 << 15 // vmulwev.q.du
+       case AVMULWODHBU:
+               return 0xe134 << 15 // vmulwod.h.bu
+       case AVMULWODWHU:
+               return 0xe135 << 15 // vmulwod.w.hu
+       case AVMULWODVWU:
+               return 0xe136 << 15 // vmulwod.d.wu
+       case AVMULWODQVU:
+               return 0xe137 << 15 // vmulwod.q.du
+       case AVMULWEVHBUB:
+               return 0xe140 << 15 // vmulwev.h.bu.b
+       case AVMULWEVWHUH:
+               return 0xe141 << 15 // vmulwev.w.hu.h
+       case AVMULWEVVWUW:
+               return 0xe142 << 15 // vmulwev.d.wu.w
+       case AVMULWEVQVUV:
+               return 0xe143 << 15 // vmulwev.q.du.d
+       case AVMULWODHBUB:
+               return 0xe144 << 15 // vmulwod.h.bu.b
+       case AVMULWODWHUH:
+               return 0xe145 << 15 // vmulwod.w.hu.h
+       case AVMULWODVWUW:
+               return 0xe146 << 15 // vmulwod.d.wu.w
+       case AVMULWODQVUV:
+               return 0xe147 << 15 // vmulwod.q.du.d
+       case AXVMULWEVHB:
+               return 0xe920 << 15 // xvmulwev.h.b
+       case AXVMULWEVWH:
+               return 0xe921 << 15 // xvmulwev.w.h
+       case AXVMULWEVVW:
+               return 0xe922 << 15 // xvmulwev.d.w
+       case AXVMULWEVQV:
+               return 0xe923 << 15 // xvmulwev.q.d
+       case AXVMULWODHB:
+               return 0xe924 << 15 // xvmulwod.h.b
+       case AXVMULWODWH:
+               return 0xe925 << 15 // xvmulwod.w.h
+       case AXVMULWODVW:
+               return 0xe926 << 15 // xvmulwod.d.w
+       case AXVMULWODQV:
+               return 0xe927 << 15 // xvmulwod.q.d
+       case AXVMULWEVHBU:
+               return 0xe930 << 15 // xvmulwev.h.bu
+       case AXVMULWEVWHU:
+               return 0xe931 << 15 // xvmulwev.w.hu
+       case AXVMULWEVVWU:
+               return 0xe932 << 15 // xvmulwev.d.wu
+       case AXVMULWEVQVU:
+               return 0xe933 << 15 // xvmulwev.q.du
+       case AXVMULWODHBU:
+               return 0xe934 << 15 // xvmulwod.h.bu
+       case AXVMULWODWHU:
+               return 0xe935 << 15 // xvmulwod.w.hu
+       case AXVMULWODVWU:
+               return 0xe936 << 15 // xvmulwod.d.wu
+       case AXVMULWODQVU:
+               return 0xe937 << 15 // xvmulwod.q.du
+       case AXVMULWEVHBUB:
+               return 0xe940 << 15 // xvmulwev.h.bu.b
+       case AXVMULWEVWHUH:
+               return 0xe941 << 15 // xvmulwev.w.hu.h
+       case AXVMULWEVVWUW:
+               return 0xe942 << 15 // xvmulwev.d.wu.w
+       case AXVMULWEVQVUV:
+               return 0xe943 << 15 // xvmulwev.q.du.d
+       case AXVMULWODHBUB:
+               return 0xe944 << 15 // xvmulwod.h.bu.b
+       case AXVMULWODWHUH:
+               return 0xe945 << 15 // xvmulwod.w.hu.h
+       case AXVMULWODVWUW:
+               return 0xe946 << 15 // xvmulwod.d.wu.w
+       case AXVMULWODQVUV:
+               return 0xe947 << 15 // xvmulwod.q.du.d
        case AVSLLB:
                return 0xe1d0 << 15 // vsll.b
        case AVSLLH: