From 8111104a2120e14ef068b9cfbda91965473ab345 Mon Sep 17 00:00:00 2001 From: Guoqi Chen Date: Fri, 31 Oct 2025 17:45:39 +0800 Subject: [PATCH] cmd/internal/obj/loong64: add {,X}VSHUF.{B/H/W/V} instructions support Go asm syntax: VSHUFB V1, V2, V3, V4 XVSHUFB X1, X2, X3, X4 VSHUF{H/W/V} V1, V2, V3 XVSHUF{H/W/V} X1, X2, X3 Equivalent platform assembler syntax: vshuf.b v4, v3, v2, v1 xvshuf.b x4, x3, x2, x1 vshuf.{h/w/d} v3, v2, v1 xvshuf.{h/w/d} x3, x2, x1 Change-Id: I8983467495f587cf46083fd81cb024400c7dc2a7 Reviewed-on: https://go-review.googlesource.com/c/go/+/716804 Reviewed-by: Meidan Li Reviewed-by: Michael Knyszek Reviewed-by: Michael Pratt LUCI-TryBot-Result: Go LUCI Reviewed-by: sophie zhao --- .../asm/internal/asm/testdata/loong64enc1.s | 10 +++++++ src/cmd/internal/obj/loong64/a.out.go | 9 +++++++ src/cmd/internal/obj/loong64/anames.go | 8 ++++++ src/cmd/internal/obj/loong64/asm.go | 26 +++++++++++++++++++ 4 files changed, 53 insertions(+) diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index b440a6456f..2eb3156e45 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -1023,6 +1023,16 @@ lable2: XVSHUF4IV $8, X1, X2 // 22209c77 XVSHUF4IV $15, X1, X2 // 223c9c77 + // [X]VSHUF.{B/H/W/V} instructions + VSHUFH V1, V2, V3 // 43847a71 + VSHUFW V1, V2, V3 // 43047b71 + VSHUFV V1, V2, V3 // 43847b71 + XVSHUFH X1, X2, X3 // 43847a75 + XVSHUFW X1, X2, X3 // 43047b75 + XVSHUFV X1, X2, X3 // 43847b75 + VSHUFB V1, V2, V3, V4 // 6488500d + XVSHUFB X1, X2, X3, X4 // 6488600d + // VPERMIW, XVPERMI{W,V,Q} instructions VPERMIW $0x1B, V1, V2 // VPERMIW $27, V1, V2 // 226ce473 XVPERMIW $0x2B, X1, X2 // XVPERMIW $43, X1, X2 // 22ace477 diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index 48e60b64bf..6bf2068334 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -1115,6 +1115,15 @@ const ( AXVSHUF4IW AXVSHUF4IV + AVSHUFB + AVSHUFH + AVSHUFW + AVSHUFV + AXVSHUFB + AXVSHUFH + AXVSHUFW + AXVSHUFV + AVPERMIW AXVPERMIW AXVPERMIV diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go index 95806741e6..0e00cb887f 100644 --- a/src/cmd/internal/obj/loong64/anames.go +++ b/src/cmd/internal/obj/loong64/anames.go @@ -586,6 +586,14 @@ var Anames = []string{ "XVSHUF4IH", "XVSHUF4IW", "XVSHUF4IV", + "VSHUFB", + "VSHUFH", + "VSHUFW", + "VSHUFV", + "XVSHUFB", + "XVSHUFH", + "XVSHUFW", + "XVSHUFV", "VPERMIW", "XVPERMIW", "XVPERMIV", diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 8bf8a1d990..e0aca15005 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -155,6 +155,8 @@ var optab = []Optab{ {AFMADDF, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 37, 4, 0, 0}, {AFMADDF, C_FREG, C_FREG, C_FREG, C_FREG, C_NONE, 37, 4, 0, 0}, + {AVSHUFB, C_VREG, C_VREG, C_VREG, C_VREG, C_NONE, 37, 4, 0, 0}, + {AXVSHUFB, C_XREG, C_XREG, C_XREG, C_XREG, C_NONE, 37, 4, 0, 0}, {AFSEL, C_FCCREG, C_FREG, C_FREG, C_FREG, C_NONE, 33, 4, 0, 0}, {AFSEL, C_FCCREG, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0}, @@ -1561,6 +1563,8 @@ func buildop(ctxt *obj.Link) { AMOVWU, AVMOVQ, AXVMOVQ, + AVSHUFB, + AXVSHUFB, ANEGW, ANEGV, AWORD, @@ -1698,6 +1702,9 @@ func buildop(ctxt *obj.Link) { opset(AVMULD, r0) opset(AVDIVF, r0) opset(AVDIVD, r0) + opset(AVSHUFH, r0) + opset(AVSHUFW, r0) + opset(AVSHUFV, r0) case AXVSEQB: opset(AXVSEQH, r0) @@ -1771,6 +1778,9 @@ func buildop(ctxt *obj.Link) { opset(AXVMULD, r0) opset(AXVDIVF, r0) opset(AXVDIVD, r0) + opset(AXVSHUFH, r0) + opset(AXVSHUFW, r0) + opset(AXVSHUFV, r0) case AVANDB: opset(AVORB, r0) @@ -3107,6 +3117,10 @@ func (c *ctxt0) oprrrr(a obj.As) uint32 { return 0x8d << 20 // fnmsub.s case AFNMSUBD: return 0x8e << 20 // fnmsub.d + case AVSHUFB: + return 0x0D5 << 20 // vshuf.b + case AXVSHUFB: + return 0x0D6 << 20 // xvshuf.b } c.ctxt.Diag("bad rrrr opcode %v", a) @@ -3775,6 +3789,18 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { return 0xea22 << 15 // xvbitrev.w case AXVBITREVV: return 0xea23 << 15 // xvbitrev.d + case AVSHUFH: + return 0x0E2F5 << 15 // vshuf.h + case AVSHUFW: + return 0x0E2F6 << 15 // vshuf.w + case AVSHUFV: + return 0x0E2F7 << 15 // vshuf.d + case AXVSHUFH: + return 0x0EAF5 << 15 // xvshuf.h + case AXVSHUFW: + return 0x0EAF6 << 15 // xvshuf.w + case AXVSHUFV: + return 0x0EAF7 << 15 // xvshuf.d } if a < 0 { -- 2.52.0