]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/riscv: add support for vector fixed-point arithmetic instructions
authorJoel Sing <joel@sing.id.au>
Sun, 5 Jan 2025 13:30:30 +0000 (00:30 +1100)
committerJoel Sing <joel@sing.id.au>
Tue, 1 Apr 2025 12:31:22 +0000 (05:31 -0700)
Add support for vector fixed-point arithmetic instructions to the
RISC-V assembler. This includes single width saturating addition
and subtraction, averaging addition and subtraction and scaling
shift instructions.

Change-Id: I9aa27e9565ad016ba5bb2b479e1ba70db24e4ff5
Reviewed-on: https://go-review.googlesource.com/c/go/+/646776
Reviewed-by: Mark Ryan <markdryan@rivosinc.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/cmd/asm/internal/asm/testdata/riscv64.s
src/cmd/asm/internal/asm/testdata/riscv64error.s
src/cmd/asm/internal/asm/testdata/riscv64validation.s
src/cmd/internal/obj/riscv/obj.go

index 74632360e3b9bfa8ff8392849d14a4a4f4416b90..1bb52e997dc22cab6c803c2595e4d53b7da0d47b 100644 (file)
@@ -863,6 +863,80 @@ start:
        VMVVX           X10, V3                         // d741055e
        VMVVI           $15, V3                         // d7b1075e
 
+       // 31.12.1: Vector Single-Width Saturating Add and Subtract
+       VSADDUVV        V1, V2, V3                      // d7812082
+       VSADDUVV        V1, V2, V0, V3                  // d7812080
+       VSADDUVX        X10, V2, V3                     // d7412582
+       VSADDUVX        X10, V2, V0, V3                 // d7412580
+       VSADDUVI        $15, V2, V3                     // d7b12782
+       VSADDUVI        $15, V2, V0, V3                 // d7b12780
+       VSADDVV         V1, V2, V3                      // d7812086
+       VSADDVV         V1, V2, V0, V3                  // d7812084
+       VSADDVX         X10, V2, V3                     // d7412586
+       VSADDVX         X10, V2, V0, V3                 // d7412584
+       VSADDVI         $15, V2, V3                     // d7b12786
+       VSADDVI         $15, V2, V0, V3                 // d7b12784
+       VSSUBUVV        V1, V2, V3                      // d781208a
+       VSSUBUVV        V1, V2, V0, V3                  // d7812088
+       VSSUBUVX        X10, V2, V3                     // d741258a
+       VSSUBUVX        X10, V2, V0, V3                 // d7412588
+       VSSUBVV         V1, V2, V3                      // d781208e
+       VSSUBVV         V1, V2, V0, V3                  // d781208c
+       VSSUBVX         X10, V2, V3                     // d741258e
+       VSSUBVX         X10, V2, V0, V3                 // d741258c
+
+       // 31.12.2: Vector Single-Width Averaging Add and Subtract
+       VAADDUVV        V1, V2, V3                      // d7a12022
+       VAADDUVV        V1, V2, V0, V3                  // d7a12020
+       VAADDUVX        X10, V2, V3                     // d7612522
+       VAADDUVX        X10, V2, V0, V3                 // d7612520
+       VAADDVV         V1, V2, V3                      // d7a12026
+       VAADDVV         V1, V2, V0, V3                  // d7a12024
+       VAADDVX         X10, V2, V3                     // d7612526
+       VAADDVX         X10, V2, V0, V3                 // d7612524
+       VASUBUVV        V1, V2, V3                      // d7a1202a
+       VASUBUVV        V1, V2, V0, V3                  // d7a12028
+       VASUBUVX        X10, V2, V3                     // d761252a
+       VASUBUVX        X10, V2, V0, V3                 // d7612528
+       VASUBVV         V1, V2, V3                      // d7a1202e
+       VASUBVV         V1, V2, V0, V3                  // d7a1202c
+       VASUBVX         X10, V2, V3                     // d761252e
+       VASUBVX         X10, V2, V0, V3                 // d761252c
+
+       // 31.12.3: Vector Single-Width Fractional Multiply with Rounding and Saturation
+       VSMULVV         V1, V2, V3                      // d781209e
+       VSMULVV         V1, V2, V0, V3                  // d781209c
+       VSMULVX         X10, V2, V3                     // d741259e
+       VSMULVX         X10, V2, V0, V3                 // d741259c
+
+       // 31.12.4: Vector Single-Width Scaling Shift Instructions
+       VSSRLVV         V1, V2, V3                      // d78120aa
+       VSSRLVV         V1, V2, V0, V3                  // d78120a8
+       VSSRLVX         X10, V2, V3                     // d74125aa
+       VSSRLVX         X10, V2, V0, V3                 // d74125a8
+       VSSRLVI         $15, V2, V3                     // d7b127aa
+       VSSRLVI         $15, V2, V0, V3                 // d7b127a8
+       VSSRAVV         V1, V2, V3                      // d78120ae
+       VSSRAVV         V1, V2, V0, V3                  // d78120ac
+       VSSRAVX         X10, V2, V3                     // d74125ae
+       VSSRAVX         X10, V2, V0, V3                 // d74125ac
+       VSSRAVI         $16, V2, V3                     // d73128ae
+       VSSRAVI         $16, V2, V0, V3                 // d73128ac
+
+       // 31.12.5: Vector Narrowing Fixed-Point Clip Instructions
+       VNCLIPUWV       V1, V2, V3                      // d78120ba
+       VNCLIPUWV       V1, V2, V0, V3                  // d78120b8
+       VNCLIPUWX       X10, V2, V3                     // d74125ba
+       VNCLIPUWX       X10, V2, V0, V3                 // d74125b8
+       VNCLIPUWI       $16, V2, V3                     // d73128ba
+       VNCLIPUWI       $16, V2, V0, V3                 // d73128b8
+       VNCLIPWV        V1, V2, V3                      // d78120be
+       VNCLIPWV        V1, V2, V0, V3                  // d78120bc
+       VNCLIPWX        X10, V2, V3                     // d74125be
+       VNCLIPWX        X10, V2, V0, V3                 // d74125bc
+       VNCLIPWI        $16, V2, V3                     // d73128be
+       VNCLIPWI        $16, V2, V0, V3                 // d73128bc
+
        //
        // Privileged ISA
        //
index 025d63a15cd9b4c79d461a2d16dc813e64572734..6a7c9b944454f8dbaeb901a874425fc2177ac449 100644 (file)
@@ -221,5 +221,37 @@ TEXT errors(SB),$0
        VMVVV   V1, V2, V3                      // ERROR "too many operands for instruction"
        VMVVX   X10, V2, V3                     // ERROR "too many operands for instruction"
        VMVVI   $15, V2, V3                     // ERROR "too many operands for instruction"
+       VSADDUVV V1, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VSADDUVX X10, V2, V4, V3                // ERROR "invalid vector mask register"
+       VSADDUVI $15, V2, V4, V3                // ERROR "invalid vector mask register"
+       VSADDVV V1, V2, V4, V3                  // ERROR "invalid vector mask register"
+       VSADDVX X10, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VSADDVI $15, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VSSUBUVV V1, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VSSUBUVX X10, V2, V4, V3                // ERROR "invalid vector mask register"
+       VSSUBVV V1, V2, V4, V3                  // ERROR "invalid vector mask register"
+       VSSUBVX X10, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VAADDUVV V1, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VAADDUVX X10, V2, V4, V3                // ERROR "invalid vector mask register"
+       VAADDVV V1, V2, V4, V3                  // ERROR "invalid vector mask register"
+       VAADDVX X10, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VASUBUVV V1, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VASUBUVX X10, V2, V4, V3                // ERROR "invalid vector mask register"
+       VASUBVV V1, V2, V4, V3                  // ERROR "invalid vector mask register"
+       VASUBVX X10, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VSMULVV V1, V2, V4, V3                  // ERROR "invalid vector mask register"
+       VSMULVX X10, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VSSRLVV V1, V2, V4, V3                  // ERROR "invalid vector mask register"
+       VSSRLVX X10, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VSSRLVI $15, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VSSRAVV V1, V2, V4, V3                  // ERROR "invalid vector mask register"
+       VSSRAVX X10, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VSSRAVI $15, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VNCLIPUWV V1, V2, V4, V3                // ERROR "invalid vector mask register"
+       VNCLIPUWX X10, V2, V4, V3               // ERROR "invalid vector mask register"
+       VNCLIPUWI $16, V2, V4, V3               // ERROR "invalid vector mask register"
+       VNCLIPWV V1, V2, V4, V3                 // ERROR "invalid vector mask register"
+       VNCLIPWX X10, V2, V4, V3                // ERROR "invalid vector mask register"
+       VNCLIPWI $16, V2, V4, V3                // ERROR "invalid vector mask register"
 
        RET
index 602cab2c2ea064ede7dcd16e0c861dfb5a8e5731..c6f71e64fb4c596f2edff336568e8b75ada62033 100644 (file)
@@ -237,5 +237,37 @@ TEXT validation(SB),$0
        VMVVX   V1, V2                          // ERROR "expected integer register in rs1 position"
        VMVVI   $16, V2                         // ERROR "signed immediate 16 must be in range [-16, 15]"
        VMVVI   $-17, V2                        // ERROR "signed immediate -17 must be in range [-16, 15]"
+       VSADDUVV X10, V2, V3                    // ERROR "expected vector register in vs1 position"
+       VSADDUVX V1, V2, V3                     // ERROR "expected integer register in rs1 position"
+       VSADDUVI $16, V2, V3                    // ERROR "signed immediate 16 must be in range [-16, 15]"
+       VSADDUVI $-17, V2, V3                   // ERROR "signed immediate -17 must be in range [-16, 15]"
+       VSSUBUVV X10, V2, V3                    // ERROR "expected vector register in vs1 position"
+       VSSUBUVX V1, V2, V3                     // ERROR "expected integer register in rs1 position"
+       VAADDUVV X10, V2, V3                    // ERROR "expected vector register in vs1 position"
+       VAADDUVX V1, V2, V3                     // ERROR "expected integer register in rs1 position"
+       VAADDVV X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VAADDVX V1, V2, V3                      // ERROR "expected integer register in rs1 position"
+       VASUBUVV X10, V2, V3                    // ERROR "expected vector register in vs1 position"
+       VASUBUVX V1, V2, V3                     // ERROR "expected integer register in rs1 position"
+       VASUBVV X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VASUBVX V1, V2, V3                      // ERROR "expected integer register in rs1 position"
+       VSMULVV X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VSMULVX V1, V2, V3                      // ERROR "expected integer register in rs1 position"
+       VSSRLVV X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VSSRLVX V1, V2, V3                      // ERROR "expected integer register in rs1 position"
+       VSSRLVI $32, V2, V3                     // ERROR "signed immediate 32 must be in range [0, 31]"
+       VSSRLVI $-1, V2, V3                     // ERROR "signed immediate -1 must be in range [0, 31]"
+       VSSRAVV X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VSSRAVX V1, V2, V3                      // ERROR "expected integer register in rs1 position"
+       VSSRAVI $32, V2, V3                     // ERROR "signed immediate 32 must be in range [0, 31]"
+       VSSRAVI $-1, V2, V3                     // ERROR "signed immediate -1 must be in range [0, 31]"
+       VNCLIPUWV X10, V2, V3                   // ERROR "expected vector register in vs1 position"
+       VNCLIPUWX V1, V2, V3                    // ERROR "expected integer register in rs1 position"
+       VNCLIPUWI $32, V2, V3                   // ERROR "signed immediate 32 must be in range [0, 31]"
+       VNCLIPUWI $-1, V2, V3                   // ERROR "signed immediate -1 must be in range [0, 31]"
+       VNCLIPWV X10, V2, V3                    // ERROR "expected vector register in vs1 position"
+       VNCLIPWX V1, V2, V3                     // ERROR "expected integer register in rs1 position"
+       VNCLIPWI $32, V2, V3                    // ERROR "signed immediate 32 must be in range [0, 31]"
+       VNCLIPWI $-1, V2, V3                    // ERROR "signed immediate -1 must be in range [0, 31]"
 
        RET
index 00b71de7cb0e3a4f8c46089ad34f5643519fc9bd..25d3b11c97d4e84d94cf20015da4f96edd81c4ae 100644 (file)
@@ -2412,6 +2412,48 @@ var instructions = [ALAST & obj.AMask]instructionData{
        AVMVVX & obj.AMask: {enc: rVIVEncoding},
        AVMVVI & obj.AMask: {enc: rVViEncoding},
 
+       // 31.12.1: Vector Single-Width Saturating Add and Subtract
+       AVSADDUVV & obj.AMask: {enc: rVVVEncoding},
+       AVSADDUVX & obj.AMask: {enc: rVIVEncoding},
+       AVSADDUVI & obj.AMask: {enc: rVViEncoding},
+       AVSADDVV & obj.AMask:  {enc: rVVVEncoding},
+       AVSADDVX & obj.AMask:  {enc: rVIVEncoding},
+       AVSADDVI & obj.AMask:  {enc: rVViEncoding},
+       AVSSUBUVV & obj.AMask: {enc: rVVVEncoding},
+       AVSSUBUVX & obj.AMask: {enc: rVIVEncoding},
+       AVSSUBVV & obj.AMask:  {enc: rVVVEncoding},
+       AVSSUBVX & obj.AMask:  {enc: rVIVEncoding},
+
+       // 31.12.2: Vector Single-Width Averaging Add and Subtract
+       AVAADDUVV & obj.AMask: {enc: rVVVEncoding},
+       AVAADDUVX & obj.AMask: {enc: rVIVEncoding},
+       AVAADDVV & obj.AMask:  {enc: rVVVEncoding},
+       AVAADDVX & obj.AMask:  {enc: rVIVEncoding},
+       AVASUBUVV & obj.AMask: {enc: rVVVEncoding},
+       AVASUBUVX & obj.AMask: {enc: rVIVEncoding},
+       AVASUBVV & obj.AMask:  {enc: rVVVEncoding},
+       AVASUBVX & obj.AMask:  {enc: rVIVEncoding},
+
+       // 31.12.3: Vector Single-Width Fractional Multiply with Rounding and Saturation
+       AVSMULVV & obj.AMask: {enc: rVVVEncoding},
+       AVSMULVX & obj.AMask: {enc: rVIVEncoding},
+
+       // 31.12.4: Vector Single-Width Scaling Shift Instructions
+       AVSSRLVV & obj.AMask: {enc: rVVVEncoding},
+       AVSSRLVX & obj.AMask: {enc: rVIVEncoding},
+       AVSSRLVI & obj.AMask: {enc: rVVuEncoding},
+       AVSSRAVV & obj.AMask: {enc: rVVVEncoding},
+       AVSSRAVX & obj.AMask: {enc: rVIVEncoding},
+       AVSSRAVI & obj.AMask: {enc: rVVuEncoding},
+
+       // 31.12.5: Vector Narrowing Fixed-Point Clip Instructions
+       AVNCLIPUWV & obj.AMask: {enc: rVVVEncoding},
+       AVNCLIPUWX & obj.AMask: {enc: rVIVEncoding},
+       AVNCLIPUWI & obj.AMask: {enc: rVVuEncoding},
+       AVNCLIPWV & obj.AMask:  {enc: rVVVEncoding},
+       AVNCLIPWX & obj.AMask:  {enc: rVIVEncoding},
+       AVNCLIPWI & obj.AMask:  {enc: rVVuEncoding},
+
        //
        // Privileged ISA
        //
@@ -3393,10 +3435,13 @@ func instructionsForProg(p *obj.Prog) []*instruction {
                AVMINUVV, AVMINUVX, AVMINVV, AVMINVX, AVMAXUVV, AVMAXUVX, AVMAXVV, AVMAXVX,
                AVMULVV, AVMULVX, AVMULHVV, AVMULHVX, AVMULHUVV, AVMULHUVX, AVMULHSUVV, AVMULHSUVX,
                AVDIVUVV, AVDIVUVX, AVDIVVV, AVDIVVX, AVREMUVV, AVREMUVX, AVREMVV, AVREMVX,
-               AVWMULVV, AVWMULVX, AVWMULUVV, AVWMULUVX, AVWMULSUVV, AVWMULSUVX,
-               AVNSRLWV, AVNSRLWX, AVNSRAWV, AVNSRAWX,
+               AVWMULVV, AVWMULVX, AVWMULUVV, AVWMULUVX, AVWMULSUVV, AVWMULSUVX, AVNSRLWV, AVNSRLWX, AVNSRAWV, AVNSRAWX,
                AVMACCVV, AVMACCVX, AVNMSACVV, AVNMSACVX, AVMADDVV, AVMADDVX, AVNMSUBVV, AVNMSUBVX,
-               AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX:
+               AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX,
+               AVSADDUVV, AVSADDUVX, AVSADDUVI, AVSADDVV, AVSADDVX, AVSADDVI, AVSSUBUVV, AVSSUBUVX, AVSSUBVV, AVSSUBVX,
+               AVAADDUVV, AVAADDUVX, AVAADDVV, AVAADDVX, AVASUBUVV, AVASUBUVX, AVASUBVV, AVASUBVX,
+               AVSMULVV, AVSMULVX, AVSSRLVV, AVSSRLVX, AVSSRLVI, AVSSRAVV, AVSSRAVX, AVSSRAVI,
+               AVNCLIPUWV, AVNCLIPUWX, AVNCLIPUWI, AVNCLIPWV, AVNCLIPWX, AVNCLIPWI:
                // Set mask bit
                switch {
                case ins.rs3 == obj.REG_NONE: