From: Guoqi Chen Date: Mon, 24 Nov 2025 12:19:06 +0000 (+0800) Subject: cmd/internal/obj/loong64: add {,x}vmadd series instructions support X-Git-Tag: go1.26rc1~84 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e0a4dffb0c;p=gostls13.git cmd/internal/obj/loong64: add {,x}vmadd series instructions support Go asm syntax: VMADD{B, H, W, V} V1, V2, V3 VMSUB{B, H, W, V} V1, V2, V3 XVMADD{B, H, W, V} X1, X2, X3 XVMSUB{B, H, W, V} X1, X2, X3 VMADDWEV{HB, WH, VW,QV}{,U} V1, V2, V3 VMADDWOD{HB, WH, VW,QV}{,U} V1, V2, V3 XVMADDWEV{HB, WH, VW,QV}{,U} X1, X2, X3 XVMADDWOD{HB, WH, VW,QV}{,U} X1, X2, X3 VMADDWEV{HBUB, WHUH, VWUW, QVUV} V1, V2, V3 VMADDWOD{HBUB, WHUH, VWUW, QVUV} V1, V2, V3 XVMADDWEV{HBUB, WHUH, VWUW, QVUV} X1, X2, X3 XVMADDWOD{HBUB, WHUH, VWUW, QVUV} X1, X2, X3 Equivalent platform assembler syntax: vmadd.{b,h,w,d} v3, v2, v1 vmsub.{b,h,w,d} v3, v2, v1 xvmadd.{b,h,w,d} x3, x2, x1 xvmsub.{b,h,w,d} x3, x2, x1 vmaddwev.{h.b, w.h, d.w, q.d}{,u} v3, v2, v1 vmaddwod.{h.b, w.h, d.w, q.d}{,u} v3, v2, v1 xvmaddwev.{h.b, w.h, d.w, q.d}{,u} x3, x2, x1 xvmaddwod.{h.b, w.h, d.w, q.d}{,u} x3, x2, x1 vmaddwev.{h.bu.b, d.wu.w, d.wu.w, q.du.d} v3, v2, v1 vmaddwod.{h.bu.b, d.wu.w, d.wu.w, q.du.d} v3, v2, v1 xvmaddwev.{h.bu.b, d.wu.w, d.wu.w, q.du.d} x3, x2, x1 xvmaddwod.{h.bu.b, d.wu.w, d.wu.w, q.du.d} x3, x2, x1 Change-Id: I2f4aae51045e1596d4744e525a1589586065cf8e Reviewed-on: https://go-review.googlesource.com/c/go/+/724200 Reviewed-by: Cherry Mui Reviewed-by: Meidan Li Reviewed-by: Dmitri Shuralyov Reviewed-by: sophie zhao LUCI-TryBot-Result: Go LUCI Auto-Submit: abner chenc --- diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index 20fd014434..42fa505832 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -1163,6 +1163,78 @@ lable2: XVSUBWODVWU X1, X2, X3 // 43043574 XVSUBWODQVU X1, X2, X3 // 43843574 + // [X]VMADD.{B/H/W/D}, [X]VMSUB.{B/H/W/D} instructions + VMADDB V1, V2, V3 // 4304a870 + VMADDH V1, V2, V3 // 4384a870 + VMADDW V1, V2, V3 // 4304a970 + VMADDV V1, V2, V3 // 4384a970 + VMSUBB V1, V2, V3 // 4304aa70 + VMSUBH V1, V2, V3 // 4384aa70 + VMSUBW V1, V2, V3 // 4304ab70 + VMSUBV V1, V2, V3 // 4384ab70 + XVMADDB X1, X2, X3 // 4304a874 + XVMADDH X1, X2, X3 // 4384a874 + XVMADDW X1, X2, X3 // 4304a974 + XVMADDV X1, X2, X3 // 4384a974 + XVMSUBB X1, X2, X3 // 4304aa74 + XVMSUBH X1, X2, X3 // 4384aa74 + XVMSUBW X1, X2, X3 // 4304ab74 + XVMSUBV X1, X2, X3 // 4384ab74 + + // [X]VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D} instructions + VMADDWEVHB V1, V2, V3 // 4304ac70 + VMADDWEVWH V1, V2, V3 // 4384ac70 + VMADDWEVVW V1, V2, V3 // 4304ad70 + VMADDWEVQV V1, V2, V3 // 4384ad70 + VMADDWODHB V1, V2, V3 // 4304ae70 + VMADDWODWH V1, V2, V3 // 4384ae70 + VMADDWODVW V1, V2, V3 // 4304af70 + VMADDWODQV V1, V2, V3 // 4384af70 + XVMADDWEVHB X1, X2, X3 // 4304ac74 + XVMADDWEVWH X1, X2, X3 // 4384ac74 + XVMADDWEVVW X1, X2, X3 // 4304ad74 + XVMADDWEVQV X1, X2, X3 // 4384ad74 + XVMADDWODHB X1, X2, X3 // 4304ae74 + XVMADDWODWH X1, X2, X3 // 4384ae74 + XVMADDWODVW X1, X2, X3 // 4304af74 + XVMADDWODQV X1, X2, X3 // 4384af74 + + // [X]VMADDW{EV/OD}.{H.B/W.H/D.W/Q.D}U instructions + VMADDWEVHBU V1, V2, V3 // 4304b470 + VMADDWEVWHU V1, V2, V3 // 4384b470 + VMADDWEVVWU V1, V2, V3 // 4304b570 + VMADDWEVQVU V1, V2, V3 // 4384b570 + VMADDWODHBU V1, V2, V3 // 4304b670 + VMADDWODWHU V1, V2, V3 // 4384b670 + VMADDWODVWU V1, V2, V3 // 4304b770 + VMADDWODQVU V1, V2, V3 // 4384b770 + XVMADDWEVHBU X1, X2, X3 // 4304b474 + XVMADDWEVWHU X1, X2, X3 // 4384b474 + XVMADDWEVVWU X1, X2, X3 // 4304b574 + XVMADDWEVQVU X1, X2, X3 // 4384b574 + XVMADDWODHBU X1, X2, X3 // 4304b674 + XVMADDWODWHU X1, X2, X3 // 4384b674 + XVMADDWODVWU X1, X2, X3 // 4304b774 + XVMADDWODQVU X1, X2, X3 // 4384b774 + + // [X]VMADDW{EV/OD}.{H.BU.B/W.HU.H/D.WU.W/Q.DU.D} instructions + VMADDWEVHBUB V1, V2, V3 // 4304bc70 + VMADDWEVWHUH V1, V2, V3 // 4384bc70 + VMADDWEVVWUW V1, V2, V3 // 4304bd70 + VMADDWEVQVUV V1, V2, V3 // 4384bd70 + VMADDWODHBUB V1, V2, V3 // 4304be70 + VMADDWODWHUH V1, V2, V3 // 4384be70 + VMADDWODVWUW V1, V2, V3 // 4304bf70 + VMADDWODQVUV V1, V2, V3 // 4384bf70 + XVMADDWEVHBUB X1, X2, X3 // 4304bc74 + XVMADDWEVWHUH X1, X2, X3 // 4384bc74 + XVMADDWEVVWUW X1, X2, X3 // 4304bd74 + XVMADDWEVQVUV X1, X2, X3 // 4384bd74 + XVMADDWODHBUB X1, X2, X3 // 4304be74 + XVMADDWODWHUH X1, X2, X3 // 4384be74 + XVMADDWODVWUW X1, X2, X3 // 4304bf74 + XVMADDWODQVUV X1, X2, X3 // 4384bf74 + // [X]VSHUF4I.{B/H/W/D} instructions VSHUF4IB $0, V2, V1 // 41009073 VSHUF4IB $16, V2, V1 // 41409073 diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index 2458fb2e8e..38d4b74959 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -1227,6 +1227,78 @@ const ( AXVSUBWODVWU AXVSUBWODQVU + AVMADDB + AVMADDH + AVMADDW + AVMADDV + AVMSUBB + AVMSUBH + AVMSUBW + AVMSUBV + + AXVMADDB + AXVMADDH + AXVMADDW + AXVMADDV + AXVMSUBB + AXVMSUBH + AXVMSUBW + AXVMSUBV + + AVMADDWEVHB + AVMADDWEVWH + AVMADDWEVVW + AVMADDWEVQV + AVMADDWODHB + AVMADDWODWH + AVMADDWODVW + AVMADDWODQV + + AVMADDWEVHBU + AVMADDWEVWHU + AVMADDWEVVWU + AVMADDWEVQVU + AVMADDWODHBU + AVMADDWODWHU + AVMADDWODVWU + AVMADDWODQVU + + AVMADDWEVHBUB + AVMADDWEVWHUH + AVMADDWEVVWUW + AVMADDWEVQVUV + AVMADDWODHBUB + AVMADDWODWHUH + AVMADDWODVWUW + AVMADDWODQVUV + + AXVMADDWEVHB + AXVMADDWEVWH + AXVMADDWEVVW + AXVMADDWEVQV + AXVMADDWODHB + AXVMADDWODWH + AXVMADDWODVW + AXVMADDWODQV + + AXVMADDWEVHBU + AXVMADDWEVWHU + AXVMADDWEVVWU + AXVMADDWEVQVU + AXVMADDWODHBU + AXVMADDWODWHU + AXVMADDWODVWU + AXVMADDWODQVU + + AXVMADDWEVHBUB + AXVMADDWEVWHUH + AXVMADDWEVVWUW + AXVMADDWEVQVUV + AXVMADDWODHBUB + AXVMADDWODWHUH + AXVMADDWODVWUW + AXVMADDWODQVUV + AVSHUF4IB AVSHUF4IH AVSHUF4IW diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go index 18f818ceba..b1fcbce196 100644 --- a/src/cmd/internal/obj/loong64/anames.go +++ b/src/cmd/internal/obj/loong64/anames.go @@ -695,6 +695,70 @@ var Anames = []string{ "XVSUBWODWHU", "XVSUBWODVWU", "XVSUBWODQVU", + "VMADDB", + "VMADDH", + "VMADDW", + "VMADDV", + "VMSUBB", + "VMSUBH", + "VMSUBW", + "VMSUBV", + "XVMADDB", + "XVMADDH", + "XVMADDW", + "XVMADDV", + "XVMSUBB", + "XVMSUBH", + "XVMSUBW", + "XVMSUBV", + "VMADDWEVHB", + "VMADDWEVWH", + "VMADDWEVVW", + "VMADDWEVQV", + "VMADDWODHB", + "VMADDWODWH", + "VMADDWODVW", + "VMADDWODQV", + "VMADDWEVHBU", + "VMADDWEVWHU", + "VMADDWEVVWU", + "VMADDWEVQVU", + "VMADDWODHBU", + "VMADDWODWHU", + "VMADDWODVWU", + "VMADDWODQVU", + "VMADDWEVHBUB", + "VMADDWEVWHUH", + "VMADDWEVVWUW", + "VMADDWEVQVUV", + "VMADDWODHBUB", + "VMADDWODWHUH", + "VMADDWODVWUW", + "VMADDWODQVUV", + "XVMADDWEVHB", + "XVMADDWEVWH", + "XVMADDWEVVW", + "XVMADDWEVQV", + "XVMADDWODHB", + "XVMADDWODWH", + "XVMADDWODVW", + "XVMADDWODQV", + "XVMADDWEVHBU", + "XVMADDWEVWHU", + "XVMADDWEVVWU", + "XVMADDWEVQVU", + "XVMADDWODHBU", + "XVMADDWODWHU", + "XVMADDWODVWU", + "XVMADDWODQVU", + "XVMADDWEVHBUB", + "XVMADDWEVWHUH", + "XVMADDWEVVWUW", + "XVMADDWEVQVUV", + "XVMADDWODHBUB", + "XVMADDWODWHUH", + "XVMADDWODVWUW", + "XVMADDWODQVUV", "VSHUF4IB", "VSHUF4IH", "VSHUF4IW", diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 6a23460098..e5f2014e95 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -1830,6 +1830,38 @@ func buildop(ctxt *obj.Link) { opset(AVSUBWODWHU, r0) opset(AVSUBWODVWU, r0) opset(AVSUBWODQVU, r0) + opset(AVMADDB, r0) + opset(AVMADDH, r0) + opset(AVMADDW, r0) + opset(AVMADDV, r0) + opset(AVMSUBB, r0) + opset(AVMSUBH, r0) + opset(AVMSUBW, r0) + opset(AVMSUBV, r0) + opset(AVMADDWEVHB, r0) + opset(AVMADDWEVWH, r0) + opset(AVMADDWEVVW, r0) + opset(AVMADDWEVQV, r0) + opset(AVMADDWODHB, r0) + opset(AVMADDWODWH, r0) + opset(AVMADDWODVW, r0) + opset(AVMADDWODQV, r0) + opset(AVMADDWEVHBU, r0) + opset(AVMADDWEVWHU, r0) + opset(AVMADDWEVVWU, r0) + opset(AVMADDWEVQVU, r0) + opset(AVMADDWODHBU, r0) + opset(AVMADDWODWHU, r0) + opset(AVMADDWODVWU, r0) + opset(AVMADDWODQVU, r0) + opset(AVMADDWEVHBUB, r0) + opset(AVMADDWEVWHUH, r0) + opset(AVMADDWEVVWUW, r0) + opset(AVMADDWEVQVUV, r0) + opset(AVMADDWODHBUB, r0) + opset(AVMADDWODWHUH, r0) + opset(AVMADDWODVWUW, r0) + opset(AVMADDWODQVUV, r0) case AXVSLTB: opset(AXVSLTH, r0) @@ -1871,6 +1903,38 @@ func buildop(ctxt *obj.Link) { opset(AXVSUBWODWHU, r0) opset(AXVSUBWODVWU, r0) opset(AXVSUBWODQVU, r0) + opset(AXVMADDB, r0) + opset(AXVMADDH, r0) + opset(AXVMADDW, r0) + opset(AXVMADDV, r0) + opset(AXVMSUBB, r0) + opset(AXVMSUBH, r0) + opset(AXVMSUBW, r0) + opset(AXVMSUBV, r0) + opset(AXVMADDWEVHB, r0) + opset(AXVMADDWEVWH, r0) + opset(AXVMADDWEVVW, r0) + opset(AXVMADDWEVQV, r0) + opset(AXVMADDWODHB, r0) + opset(AXVMADDWODWH, r0) + opset(AXVMADDWODVW, r0) + opset(AXVMADDWODQV, r0) + opset(AXVMADDWEVHBU, r0) + opset(AXVMADDWEVWHU, r0) + opset(AXVMADDWEVVWU, r0) + opset(AXVMADDWEVQVU, r0) + opset(AXVMADDWODHBU, r0) + opset(AXVMADDWODWHU, r0) + opset(AXVMADDWODVWU, r0) + opset(AXVMADDWODQVU, r0) + opset(AXVMADDWEVHBUB, r0) + opset(AXVMADDWEVWHUH, r0) + opset(AXVMADDWEVVWUW, r0) + opset(AXVMADDWEVQVUV, r0) + opset(AXVMADDWODHBUB, r0) + opset(AXVMADDWODWHUH, r0) + opset(AXVMADDWODVWUW, r0) + opset(AXVMADDWODQVUV, r0) case AVANDB: opset(AVORB, r0) @@ -3811,6 +3875,134 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { return 0x0E86A << 15 // xvsubwod.d.wu case AXVSUBWODQVU: return 0x0E86B << 15 // xvsubwod.q.du + case AVMADDB: + return 0x0E150 << 15 // vmadd.b + case AVMADDH: + return 0x0E151 << 15 // vmadd.h + case AVMADDW: + return 0x0E152 << 15 // vmadd.w + case AVMADDV: + return 0x0E153 << 15 // vmadd.d + case AVMSUBB: + return 0x0E154 << 15 // vmsub.b + case AVMSUBH: + return 0x0E155 << 15 // vmsub.h + case AVMSUBW: + return 0x0E156 << 15 // vmsub.w + case AVMSUBV: + return 0x0E157 << 15 // vmsub.d + case AXVMADDB: + return 0x0E950 << 15 // xvmadd.b + case AXVMADDH: + return 0x0E951 << 15 // xvmadd.h + case AXVMADDW: + return 0x0E952 << 15 // xvmadd.w + case AXVMADDV: + return 0x0E953 << 15 // xvmadd.d + case AXVMSUBB: + return 0x0E954 << 15 // xvmsub.b + case AXVMSUBH: + return 0x0E955 << 15 // xvmsub.h + case AXVMSUBW: + return 0x0E956 << 15 // xvmsub.w + case AXVMSUBV: + return 0x0E957 << 15 // xvmsub.d + case AVMADDWEVHB: + return 0x0E158 << 15 // vmaddwev.h.b + case AVMADDWEVWH: + return 0x0E159 << 15 // vmaddwev.w.h + case AVMADDWEVVW: + return 0x0E15A << 15 // vmaddwev.d.w + case AVMADDWEVQV: + return 0x0E15B << 15 // vmaddwev.q.d + case AVMADDWODHB: + return 0x0E15C << 15 // vmaddwov.h.b + case AVMADDWODWH: + return 0x0E15D << 15 // vmaddwod.w.h + case AVMADDWODVW: + return 0x0E15E << 15 // vmaddwod.d.w + case AVMADDWODQV: + return 0x0E15F << 15 // vmaddwod.q.d + case AVMADDWEVHBU: + return 0x0E168 << 15 // vmaddwev.h.bu + case AVMADDWEVWHU: + return 0x0E169 << 15 // vmaddwev.w.hu + case AVMADDWEVVWU: + return 0x0E16A << 15 // vmaddwev.d.wu + case AVMADDWEVQVU: + return 0x0E16B << 15 // vmaddwev.q.du + case AVMADDWODHBU: + return 0x0E16C << 15 // vmaddwov.h.bu + case AVMADDWODWHU: + return 0x0E16D << 15 // vmaddwod.w.hu + case AVMADDWODVWU: + return 0x0E16E << 15 // vmaddwod.d.wu + case AVMADDWODQVU: + return 0x0E16F << 15 // vmaddwod.q.du + case AVMADDWEVHBUB: + return 0x0E178 << 15 // vmaddwev.h.bu.b + case AVMADDWEVWHUH: + return 0x0E179 << 15 // vmaddwev.w.hu.h + case AVMADDWEVVWUW: + return 0x0E17A << 15 // vmaddwev.d.wu.w + case AVMADDWEVQVUV: + return 0x0E17B << 15 // vmaddwev.q.du.d + case AVMADDWODHBUB: + return 0x0E17C << 15 // vmaddwov.h.bu.b + case AVMADDWODWHUH: + return 0x0E17D << 15 // vmaddwod.w.hu.h + case AVMADDWODVWUW: + return 0x0E17E << 15 // vmaddwod.d.wu.w + case AVMADDWODQVUV: + return 0x0E17F << 15 // vmaddwod.q.du.d + case AXVMADDWEVHB: + return 0x0E958 << 15 // xvmaddwev.h.b + case AXVMADDWEVWH: + return 0x0E959 << 15 // xvmaddwev.w.h + case AXVMADDWEVVW: + return 0x0E95A << 15 // xvmaddwev.d.w + case AXVMADDWEVQV: + return 0x0E95B << 15 // xvmaddwev.q.d + case AXVMADDWODHB: + return 0x0E95C << 15 // xvmaddwov.h.b + case AXVMADDWODWH: + return 0x0E95D << 15 // xvmaddwod.w.h + case AXVMADDWODVW: + return 0x0E95E << 15 // xvmaddwod.d.w + case AXVMADDWODQV: + return 0x0E95F << 15 // xvmaddwod.q.d + case AXVMADDWEVHBU: + return 0x0E968 << 15 // xvmaddwev.h.bu + case AXVMADDWEVWHU: + return 0x0E969 << 15 // xvmaddwev.w.hu + case AXVMADDWEVVWU: + return 0x0E96A << 15 // xvmaddwev.d.wu + case AXVMADDWEVQVU: + return 0x0E96B << 15 // xvmaddwev.q.du + case AXVMADDWODHBU: + return 0x0E96C << 15 // xvmaddwov.h.bu + case AXVMADDWODWHU: + return 0x0E96D << 15 // xvmaddwod.w.hu + case AXVMADDWODVWU: + return 0x0E96E << 15 // xvmaddwod.d.wu + case AXVMADDWODQVU: + return 0x0E96F << 15 // xvmaddwod.q.du + case AXVMADDWEVHBUB: + return 0x0E978 << 15 // xvmaddwev.h.bu.b + case AXVMADDWEVWHUH: + return 0x0E979 << 15 // xvmaddwev.w.hu.h + case AXVMADDWEVVWUW: + return 0x0E97A << 15 // xvmaddwev.d.wu.w + case AXVMADDWEVQVUV: + return 0x0E97B << 15 // xvmaddwev.q.du.d + case AXVMADDWODHBUB: + return 0x0E97C << 15 // xvmaddwov.h.bu.b + case AXVMADDWODWHUH: + return 0x0E97D << 15 // xvmaddwod.w.hu.h + case AXVMADDWODVWUW: + return 0x0E97E << 15 // xvmaddwod.d.wu.w + case AXVMADDWODQVUV: + return 0x0E97F << 15 // xvmaddwod.q.du.d case AVSLLB: return 0xe1d0 << 15 // vsll.b case AVSLLH: