From: Joel Sing Date: Sun, 5 Jan 2025 13:30:30 +0000 (+1100) Subject: cmd/internal/obj/riscv: add support for vector fixed-point arithmetic instructions X-Git-Tag: go1.25rc1~570 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=4c1b8ca98c716f0499bb30fd141aa6d4dec7390c;p=gostls13.git cmd/internal/obj/riscv: add support for vector fixed-point arithmetic instructions Add support for vector fixed-point arithmetic instructions to the RISC-V assembler. This includes single width saturating addition and subtraction, averaging addition and subtraction and scaling shift instructions. Change-Id: I9aa27e9565ad016ba5bb2b479e1ba70db24e4ff5 Reviewed-on: https://go-review.googlesource.com/c/go/+/646776 Reviewed-by: Mark Ryan Reviewed-by: Carlos Amedee Reviewed-by: Dmitri Shuralyov LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/asm/internal/asm/testdata/riscv64.s b/src/cmd/asm/internal/asm/testdata/riscv64.s index 74632360e3..1bb52e997d 100644 --- a/src/cmd/asm/internal/asm/testdata/riscv64.s +++ b/src/cmd/asm/internal/asm/testdata/riscv64.s @@ -863,6 +863,80 @@ start: VMVVX X10, V3 // d741055e VMVVI $15, V3 // d7b1075e + // 31.12.1: Vector Single-Width Saturating Add and Subtract + VSADDUVV V1, V2, V3 // d7812082 + VSADDUVV V1, V2, V0, V3 // d7812080 + VSADDUVX X10, V2, V3 // d7412582 + VSADDUVX X10, V2, V0, V3 // d7412580 + VSADDUVI $15, V2, V3 // d7b12782 + VSADDUVI $15, V2, V0, V3 // d7b12780 + VSADDVV V1, V2, V3 // d7812086 + VSADDVV V1, V2, V0, V3 // d7812084 + VSADDVX X10, V2, V3 // d7412586 + VSADDVX X10, V2, V0, V3 // d7412584 + VSADDVI $15, V2, V3 // d7b12786 + VSADDVI $15, V2, V0, V3 // d7b12784 + VSSUBUVV V1, V2, V3 // d781208a + VSSUBUVV V1, V2, V0, V3 // d7812088 + VSSUBUVX X10, V2, V3 // d741258a + VSSUBUVX X10, V2, V0, V3 // d7412588 + VSSUBVV V1, V2, V3 // d781208e + VSSUBVV V1, V2, V0, V3 // d781208c + VSSUBVX X10, V2, V3 // d741258e + VSSUBVX X10, V2, V0, V3 // d741258c + + // 31.12.2: Vector Single-Width Averaging Add and Subtract + VAADDUVV V1, V2, V3 // d7a12022 + VAADDUVV V1, V2, V0, V3 // d7a12020 + VAADDUVX X10, V2, V3 // d7612522 + VAADDUVX X10, V2, V0, V3 // d7612520 + VAADDVV V1, V2, V3 // d7a12026 + VAADDVV V1, V2, V0, V3 // d7a12024 + VAADDVX X10, V2, V3 // d7612526 + VAADDVX X10, V2, V0, V3 // d7612524 + VASUBUVV V1, V2, V3 // d7a1202a + VASUBUVV V1, V2, V0, V3 // d7a12028 + VASUBUVX X10, V2, V3 // d761252a + VASUBUVX X10, V2, V0, V3 // d7612528 + VASUBVV V1, V2, V3 // d7a1202e + VASUBVV V1, V2, V0, V3 // d7a1202c + VASUBVX X10, V2, V3 // d761252e + VASUBVX X10, V2, V0, V3 // d761252c + + // 31.12.3: Vector Single-Width Fractional Multiply with Rounding and Saturation + VSMULVV V1, V2, V3 // d781209e + VSMULVV V1, V2, V0, V3 // d781209c + VSMULVX X10, V2, V3 // d741259e + VSMULVX X10, V2, V0, V3 // d741259c + + // 31.12.4: Vector Single-Width Scaling Shift Instructions + VSSRLVV V1, V2, V3 // d78120aa + VSSRLVV V1, V2, V0, V3 // d78120a8 + VSSRLVX X10, V2, V3 // d74125aa + VSSRLVX X10, V2, V0, V3 // d74125a8 + VSSRLVI $15, V2, V3 // d7b127aa + VSSRLVI $15, V2, V0, V3 // d7b127a8 + VSSRAVV V1, V2, V3 // d78120ae + VSSRAVV V1, V2, V0, V3 // d78120ac + VSSRAVX X10, V2, V3 // d74125ae + VSSRAVX X10, V2, V0, V3 // d74125ac + VSSRAVI $16, V2, V3 // d73128ae + VSSRAVI $16, V2, V0, V3 // d73128ac + + // 31.12.5: Vector Narrowing Fixed-Point Clip Instructions + VNCLIPUWV V1, V2, V3 // d78120ba + VNCLIPUWV V1, V2, V0, V3 // d78120b8 + VNCLIPUWX X10, V2, V3 // d74125ba + VNCLIPUWX X10, V2, V0, V3 // d74125b8 + VNCLIPUWI $16, V2, V3 // d73128ba + VNCLIPUWI $16, V2, V0, V3 // d73128b8 + VNCLIPWV V1, V2, V3 // d78120be + VNCLIPWV V1, V2, V0, V3 // d78120bc + VNCLIPWX X10, V2, V3 // d74125be + VNCLIPWX X10, V2, V0, V3 // d74125bc + VNCLIPWI $16, V2, V3 // d73128be + VNCLIPWI $16, V2, V0, V3 // d73128bc + // // Privileged ISA // diff --git a/src/cmd/asm/internal/asm/testdata/riscv64error.s b/src/cmd/asm/internal/asm/testdata/riscv64error.s index 025d63a15c..6a7c9b9444 100644 --- a/src/cmd/asm/internal/asm/testdata/riscv64error.s +++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s @@ -221,5 +221,37 @@ TEXT errors(SB),$0 VMVVV V1, V2, V3 // ERROR "too many operands for instruction" VMVVX X10, V2, V3 // ERROR "too many operands for instruction" VMVVI $15, V2, V3 // ERROR "too many operands for instruction" + VSADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VSADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VSADDUVI $15, V2, V4, V3 // ERROR "invalid vector mask register" + VSADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VSADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VSADDVI $15, V2, V4, V3 // ERROR "invalid vector mask register" + VSSUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VSSUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VSSUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VSSUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VAADDUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VAADDUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VAADDVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VAADDVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VASUBUVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VASUBUVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VASUBVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VASUBVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VSMULVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VSMULVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VSSRLVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VSSRLVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VSSRLVI $15, V2, V4, V3 // ERROR "invalid vector mask register" + VSSRAVV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VSSRAVX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VSSRAVI $15, V2, V4, V3 // ERROR "invalid vector mask register" + VNCLIPUWV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VNCLIPUWX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VNCLIPUWI $16, V2, V4, V3 // ERROR "invalid vector mask register" + VNCLIPWV V1, V2, V4, V3 // ERROR "invalid vector mask register" + VNCLIPWX X10, V2, V4, V3 // ERROR "invalid vector mask register" + VNCLIPWI $16, V2, V4, V3 // ERROR "invalid vector mask register" RET diff --git a/src/cmd/asm/internal/asm/testdata/riscv64validation.s b/src/cmd/asm/internal/asm/testdata/riscv64validation.s index 602cab2c2e..c6f71e64fb 100644 --- a/src/cmd/asm/internal/asm/testdata/riscv64validation.s +++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s @@ -237,5 +237,37 @@ TEXT validation(SB),$0 VMVVX V1, V2 // ERROR "expected integer register in rs1 position" VMVVI $16, V2 // ERROR "signed immediate 16 must be in range [-16, 15]" VMVVI $-17, V2 // ERROR "signed immediate -17 must be in range [-16, 15]" + VSADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VSADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VSADDUVI $16, V2, V3 // ERROR "signed immediate 16 must be in range [-16, 15]" + VSADDUVI $-17, V2, V3 // ERROR "signed immediate -17 must be in range [-16, 15]" + VSSUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VSSUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VAADDUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VAADDUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VAADDVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VAADDVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VASUBUVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VASUBUVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VASUBVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VASUBVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VSMULVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VSMULVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VSSRLVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VSSRLVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VSSRLVI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" + VSSRLVI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" + VSSRAVV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VSSRAVX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VSSRAVI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" + VSSRAVI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" + VNCLIPUWV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VNCLIPUWX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VNCLIPUWI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" + VNCLIPUWI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" + VNCLIPWV X10, V2, V3 // ERROR "expected vector register in vs1 position" + VNCLIPWX V1, V2, V3 // ERROR "expected integer register in rs1 position" + VNCLIPWI $32, V2, V3 // ERROR "signed immediate 32 must be in range [0, 31]" + VNCLIPWI $-1, V2, V3 // ERROR "signed immediate -1 must be in range [0, 31]" RET diff --git a/src/cmd/internal/obj/riscv/obj.go b/src/cmd/internal/obj/riscv/obj.go index 00b71de7cb..25d3b11c97 100644 --- a/src/cmd/internal/obj/riscv/obj.go +++ b/src/cmd/internal/obj/riscv/obj.go @@ -2412,6 +2412,48 @@ var instructions = [ALAST & obj.AMask]instructionData{ AVMVVX & obj.AMask: {enc: rVIVEncoding}, AVMVVI & obj.AMask: {enc: rVViEncoding}, + // 31.12.1: Vector Single-Width Saturating Add and Subtract + AVSADDUVV & obj.AMask: {enc: rVVVEncoding}, + AVSADDUVX & obj.AMask: {enc: rVIVEncoding}, + AVSADDUVI & obj.AMask: {enc: rVViEncoding}, + AVSADDVV & obj.AMask: {enc: rVVVEncoding}, + AVSADDVX & obj.AMask: {enc: rVIVEncoding}, + AVSADDVI & obj.AMask: {enc: rVViEncoding}, + AVSSUBUVV & obj.AMask: {enc: rVVVEncoding}, + AVSSUBUVX & obj.AMask: {enc: rVIVEncoding}, + AVSSUBVV & obj.AMask: {enc: rVVVEncoding}, + AVSSUBVX & obj.AMask: {enc: rVIVEncoding}, + + // 31.12.2: Vector Single-Width Averaging Add and Subtract + AVAADDUVV & obj.AMask: {enc: rVVVEncoding}, + AVAADDUVX & obj.AMask: {enc: rVIVEncoding}, + AVAADDVV & obj.AMask: {enc: rVVVEncoding}, + AVAADDVX & obj.AMask: {enc: rVIVEncoding}, + AVASUBUVV & obj.AMask: {enc: rVVVEncoding}, + AVASUBUVX & obj.AMask: {enc: rVIVEncoding}, + AVASUBVV & obj.AMask: {enc: rVVVEncoding}, + AVASUBVX & obj.AMask: {enc: rVIVEncoding}, + + // 31.12.3: Vector Single-Width Fractional Multiply with Rounding and Saturation + AVSMULVV & obj.AMask: {enc: rVVVEncoding}, + AVSMULVX & obj.AMask: {enc: rVIVEncoding}, + + // 31.12.4: Vector Single-Width Scaling Shift Instructions + AVSSRLVV & obj.AMask: {enc: rVVVEncoding}, + AVSSRLVX & obj.AMask: {enc: rVIVEncoding}, + AVSSRLVI & obj.AMask: {enc: rVVuEncoding}, + AVSSRAVV & obj.AMask: {enc: rVVVEncoding}, + AVSSRAVX & obj.AMask: {enc: rVIVEncoding}, + AVSSRAVI & obj.AMask: {enc: rVVuEncoding}, + + // 31.12.5: Vector Narrowing Fixed-Point Clip Instructions + AVNCLIPUWV & obj.AMask: {enc: rVVVEncoding}, + AVNCLIPUWX & obj.AMask: {enc: rVIVEncoding}, + AVNCLIPUWI & obj.AMask: {enc: rVVuEncoding}, + AVNCLIPWV & obj.AMask: {enc: rVVVEncoding}, + AVNCLIPWX & obj.AMask: {enc: rVIVEncoding}, + AVNCLIPWI & obj.AMask: {enc: rVVuEncoding}, + // // Privileged ISA // @@ -3393,10 +3435,13 @@ func instructionsForProg(p *obj.Prog) []*instruction { AVMINUVV, AVMINUVX, AVMINVV, AVMINVX, AVMAXUVV, AVMAXUVX, AVMAXVV, AVMAXVX, AVMULVV, AVMULVX, AVMULHVV, AVMULHVX, AVMULHUVV, AVMULHUVX, AVMULHSUVV, AVMULHSUVX, AVDIVUVV, AVDIVUVX, AVDIVVV, AVDIVVX, AVREMUVV, AVREMUVX, AVREMVV, AVREMVX, - AVWMULVV, AVWMULVX, AVWMULUVV, AVWMULUVX, AVWMULSUVV, AVWMULSUVX, - AVNSRLWV, AVNSRLWX, AVNSRAWV, AVNSRAWX, + AVWMULVV, AVWMULVX, AVWMULUVV, AVWMULUVX, AVWMULSUVV, AVWMULSUVX, AVNSRLWV, AVNSRLWX, AVNSRAWV, AVNSRAWX, AVMACCVV, AVMACCVX, AVNMSACVV, AVNMSACVX, AVMADDVV, AVMADDVX, AVNMSUBVV, AVNMSUBVX, - AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX: + AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX, + AVSADDUVV, AVSADDUVX, AVSADDUVI, AVSADDVV, AVSADDVX, AVSADDVI, AVSSUBUVV, AVSSUBUVX, AVSSUBVV, AVSSUBVX, + AVAADDUVV, AVAADDUVX, AVAADDVV, AVAADDVX, AVASUBUVV, AVASUBUVX, AVASUBVV, AVASUBVX, + AVSMULVV, AVSMULVX, AVSSRLVV, AVSSRLVX, AVSSRLVI, AVSSRAVV, AVSSRAVX, AVSSRAVI, + AVNCLIPUWV, AVNCLIPUWX, AVNCLIPUWI, AVNCLIPWV, AVNCLIPWX, AVNCLIPWI: // Set mask bit switch { case ins.rs3 == obj.REG_NONE: