From: Xiaolin Zhao Date: Thu, 19 Dec 2024 13:07:42 +0000 (+0800) Subject: cmd/internal/obj/loong64: add {V,XV}{ADD/SUB}.{B,H,W,D,Q} and {V,XV}{ADD/SUB}.{B... X-Git-Tag: go1.25rc1~821 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=39d7a4973604d816ee0757a46e7525deeb25b37c;p=gostls13.git cmd/internal/obj/loong64: add {V,XV}{ADD/SUB}.{B,H,W,D,Q} and {V,XV}{ADD/SUB}.{B,H,W,D}Uinstructions support Go asm syntax: V{ADD/SUB}{B,H,W,V,Q} VK, VJ, VD XV{ADD/SUB}{B,H,W,V,Q} XK, XJ, XD V{ADD/SUB}{B,H,W,V}U $1, VJ, VD XV{ADD/SUB}{B,H,W,V}U $1, XJ, XD Equivalent platform assembler syntax: v{add/sub}.{b,h,w,d,q} vd, vj, vk xv{add/sub}.{b,h,w,d,q} xd, xj, xk v{add/sub}i.{b,h,w,d}u vd, vj, $1 xv{add/sub}i.{b,h,w,d}u xd, xj, $1 Change-Id: Ia1ef0bc062f4403bb0b1514c2cf1c0264f5d22ee Reviewed-on: https://go-review.googlesource.com/c/go/+/637795 Reviewed-by: Meidan Li Reviewed-by: Junyang Shao Reviewed-by: Michael Pratt Reviewed-by: abner chenc LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index 3a8b8b8e5a..c3d1908e75 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -679,3 +679,43 @@ lable2: XVROTRV $0, X2, X1 // 4100a176 XVROTRV $63, X2, X1 // 41fca176 XVROTRV $52, X2 // 42d0a176 + + // [X]VADD{B,H,W,V,Q}, [X]VSUB{B,H,W,V,Q} instructions + VADDB V1, V2, V3 // 43040a70 + VADDH V1, V2, V3 // 43840a70 + VADDW V1, V2, V3 // 43040b70 + VADDV V1, V2, V3 // 43840b70 + VADDQ V1, V2, V3 // 43042d71 + VSUBB V1, V2, V3 // 43040c70 + VSUBH V1, V2, V3 // 43840c70 + VSUBW V1, V2, V3 // 43040d70 + VSUBV V1, V2, V3 // 43840d70 + VSUBQ V1, V2, V3 // 43842d71 + XVADDB X3, X2, X1 // 410c0a74 + XVADDH X3, X2, X1 // 418c0a74 + XVADDW X3, X2, X1 // 410c0b74 + XVADDV X3, X2, X1 // 418c0b74 + XVADDQ X3, X2, X1 // 410c2d75 + XVSUBB X3, X2, X1 // 410c0c74 + XVSUBH X3, X2, X1 // 418c0c74 + XVSUBW X3, X2, X1 // 410c0d74 + XVSUBV X3, X2, X1 // 418c0d74 + XVSUBQ X3, X2, X1 // 418c2d75 + + // [X]VADD{B,H,W,V}U, [X]VSUB{B,H,W,V}U instructions + VADDBU $1, V2, V1 // 41048a72 + VADDHU $2, V2, V1 // 41888a72 + VADDWU $3, V2, V1 // 410c8b72 + VADDVU $4, V2, V1 // 41908b72 + VSUBBU $5, V2, V1 // 41148c72 + VSUBHU $6, V2, V1 // 41988c72 + VSUBWU $7, V2, V1 // 411c8d72 + VSUBVU $8, V2, V1 // 41a08d72 + XVADDBU $9, X1, X2 // 22248a76 + XVADDHU $10, X1, X2 // 22a88a76 + XVADDWU $11, X1, X2 // 222c8b76 + XVADDVU $12, X1, X2 // 22b08b76 + XVSUBBU $13, X1, X2 // 22348c76 + XVSUBHU $14, X1, X2 // 22b88c76 + XVSUBWU $15, X1, X2 // 223c8d76 + XVSUBVU $16, X1, X2 // 22c08d76 diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index 842c800bf5..2bc895b880 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -745,6 +745,44 @@ const ( AVMOVQ AXVMOVQ + // LSX and LASX arithmetic instructions + AVADDB + AVADDH + AVADDW + AVADDV + AVADDQ + AXVADDB + AXVADDH + AXVADDW + AXVADDV + AXVADDQ + AVSUBB + AVSUBH + AVSUBW + AVSUBV + AVSUBQ + AXVSUBB + AXVSUBH + AXVSUBW + AXVSUBV + AXVSUBQ + AVADDBU + AVADDHU + AVADDWU + AVADDVU + AVSUBBU + AVSUBHU + AVSUBWU + AVSUBVU + AXVADDBU + AXVADDHU + AXVADDWU + AXVADDVU + AXVSUBBU + AXVSUBHU + AXVSUBWU + AXVSUBVU + // LSX and LASX Bit-manipulation Instructions AVPCNTB AVPCNTH diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go index 82c38dde1a..7201f7d961 100644 --- a/src/cmd/internal/obj/loong64/anames.go +++ b/src/cmd/internal/obj/loong64/anames.go @@ -261,6 +261,42 @@ var Anames = []string{ "FTINTRNEVD", "VMOVQ", "XVMOVQ", + "VADDB", + "VADDH", + "VADDW", + "VADDV", + "VADDQ", + "XVADDB", + "XVADDH", + "XVADDW", + "XVADDV", + "XVADDQ", + "VSUBB", + "VSUBH", + "VSUBW", + "VSUBV", + "VSUBQ", + "XVSUBB", + "XVSUBH", + "XVSUBW", + "XVSUBV", + "XVSUBQ", + "VADDBU", + "VADDHU", + "VADDWU", + "VADDVU", + "VSUBBU", + "VSUBHU", + "VSUBWU", + "VSUBVU", + "XVADDBU", + "XVADDHU", + "XVADDWU", + "XVADDVU", + "XVSUBBU", + "XVSUBHU", + "XVSUBWU", + "XVSUBVU", "VPCNTB", "VPCNTH", "VPCNTW", diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 96c0e7b890..3ede022104 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -89,6 +89,11 @@ var optab = []Optab{ {AVSEQB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, {AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, + {AVADDB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, + {AVADDB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, + {AXVADDB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, + {AXVADDB, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, + {AVSLLB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, {AVSLLB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0}, {AXVSLLB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0}, @@ -1550,6 +1555,28 @@ func buildop(ctxt *obj.Link) { opset(AXVPCNTW, r0) opset(AXVPCNTV, r0) + case AVADDB: + opset(AVADDH, r0) + opset(AVADDW, r0) + opset(AVADDV, r0) + opset(AVADDQ, r0) + opset(AVSUBB, r0) + opset(AVSUBH, r0) + opset(AVSUBW, r0) + opset(AVSUBV, r0) + opset(AVSUBQ, r0) + + case AXVADDB: + opset(AXVADDH, r0) + opset(AXVADDW, r0) + opset(AXVADDV, r0) + opset(AXVADDQ, r0) + opset(AXVSUBB, r0) + opset(AXVSUBH, r0) + opset(AXVSUBW, r0) + opset(AXVSUBV, r0) + opset(AXVSUBQ, r0) + case AVSLLB: opset(AVSRLB, r0) opset(AVSRAB, r0) @@ -1574,11 +1601,27 @@ func buildop(ctxt *obj.Link) { opset(AVSRLW, r0) opset(AVSRAW, r0) opset(AVROTRW, r0) + opset(AVADDBU, r0) + opset(AVADDHU, r0) + opset(AVADDWU, r0) + opset(AVADDVU, r0) + opset(AVSUBBU, r0) + opset(AVSUBHU, r0) + opset(AVSUBWU, r0) + opset(AVSUBVU, r0) case AXVSLLW: opset(AXVSRLW, r0) opset(AXVSRAW, r0) opset(AXVROTRW, r0) + opset(AXVADDBU, r0) + opset(AXVADDHU, r0) + opset(AXVADDWU, r0) + opset(AXVADDVU, r0) + opset(AXVSUBBU, r0) + opset(AXVSUBHU, r0) + opset(AXVSUBWU, r0) + opset(AXVSUBVU, r0) case AVSLLV: opset(AVSRLV, r0) @@ -2779,6 +2822,46 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { return 0xe9de << 15 // xvrotr.w case AXVROTRV: return 0xe9df << 15 // xvrotr.d + case AVADDB: + return 0xe014 << 15 // vadd.b + case AVADDH: + return 0xe015 << 15 // vadd.h + case AVADDW: + return 0xe016 << 15 // vadd.w + case AVADDV: + return 0xe017 << 15 // vadd.d + case AVADDQ: + return 0xe25a << 15 // vadd.q + case AVSUBB: + return 0xe018 << 15 // vsub.b + case AVSUBH: + return 0xe019 << 15 // vsub.h + case AVSUBW: + return 0xe01a << 15 // vsub.w + case AVSUBV: + return 0xe01b << 15 // vsub.d + case AVSUBQ: + return 0xe25b << 15 // vsub.q + case AXVADDB: + return 0xe814 << 15 // xvadd.b + case AXVADDH: + return 0xe815 << 15 // xvadd.h + case AXVADDW: + return 0xe816 << 15 // xvadd.w + case AXVADDV: + return 0xe817 << 15 // xvadd.d + case AXVADDQ: + return 0xea5a << 15 // xvadd.q + case AXVSUBB: + return 0xe818 << 15 // xvsub.b + case AXVSUBH: + return 0xe819 << 15 // xvsub.h + case AXVSUBW: + return 0xe81a << 15 // xvsub.w + case AXVSUBV: + return 0xe81b << 15 // xvsub.d + case AXVSUBQ: + return 0xea5b << 15 // xvsub.q } if a < 0 { @@ -3170,6 +3253,38 @@ func (c *ctxt0) opirr(a obj.As) uint32 { return 0x1dcd<<18 | 0x1<<15 // xvsrai.w case AXVSRAV: return 0x1dcd<<18 | 0x1<<16 // xvsrai.d + case AVADDBU: + return 0xe514 << 15 // vaddi.bu + case AVADDHU: + return 0xe515 << 15 // vaddi.hu + case AVADDWU: + return 0xe516 << 15 // vaddi.wu + case AVADDVU: + return 0xe517 << 15 // vaddi.du + case AVSUBBU: + return 0xe518 << 15 // vsubi.bu + case AVSUBHU: + return 0xe519 << 15 // vsubi.hu + case AVSUBWU: + return 0xe51a << 15 // vsubi.wu + case AVSUBVU: + return 0xe51b << 15 // vsubi.du + case AXVADDBU: + return 0xed14 << 15 // xvaddi.bu + case AXVADDHU: + return 0xed15 << 15 // xvaddi.hu + case AXVADDWU: + return 0xed16 << 15 // xvaddi.wu + case AXVADDVU: + return 0xed17 << 15 // xvaddi.du + case AXVSUBBU: + return 0xed18 << 15 // xvsubi.bu + case AXVSUBHU: + return 0xed19 << 15 // xvsubi.hu + case AXVSUBWU: + return 0xed1a << 15 // xvsubi.wu + case AXVSUBVU: + return 0xed1b << 15 // xvsubi.du } if a < 0 {