]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/riscv: fix vector integer multiply add
authorMark Ryan <markdryan@rivosinc.com>
Tue, 6 May 2025 12:07:09 +0000 (14:07 +0200)
committerMark Ryan <markdryan@rivosinc.com>
Wed, 21 May 2025 14:19:19 +0000 (07:19 -0700)
The RISC-V integer vector multiply add instructions are not encoded
correctly; the first and second arguments are swapped. For example,
the instruction

VMACCVV V1, V2, V3

encodes to

b620a1d7 or vmacc.vv v3,v1,v2

and not

b61121d7 or vmacc.vv v3,v2,v1

as expected.

This is inconsistent with the argument ordering we use for 3
argument vector instructions, in which the argument order, as given
in the RISC-V specifications, is reversed, and also with the vector
FMA instructions which have the same argument ordering as the vector
integer multiply add instructions in the "The RISC-V Instruction Set
Manual Volume I". For example, in the ISA manual we have the
following instruction definitions

; Integer multiply-add, overwrite addend
vmacc.vv vd, vs1, vs2, vm    # vd[i] = +(vs1[i] * vs2[i]) + vd[i]

; FP multiply-accumulate, overwrites addend
vfmacc.vv vd, vs1, vs2, vm    # vd[i] = +(vs1[i] * vs2[i]) + vd[i]

It's reasonable to expect that the Go assembler would use the same
argument ordering for both of these instructions. It currently does
not.

We fix the issue by switching the argument ordering for the vector
integer multiply add instructions to match those of the vector FMA
instructions.

Change-Id: Ib98e9999617f991969e5c831734b3bb3324439f6
Reviewed-on: https://go-review.googlesource.com/c/go/+/670335
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Meng Zhuo <mengzhuo1203@gmail.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/asm/internal/asm/testdata/riscv64.s
src/cmd/asm/internal/asm/testdata/riscv64validation.s
src/cmd/internal/obj/riscv/obj.go

index 4e0226a2b6f48673b3b7df9b908693ef44339b57..75abcefa10227ba2da615feaea3fb473bc112f25 100644 (file)
@@ -830,38 +830,38 @@ start:
        VWMULSUVX       X10, V2, V0, V3                 // d76125e8
 
        // 31.11.13: Vector Single-Width Integer Multiply-Add Instructions
-       VMACCVV         V1, V2, V3                      // d7a120b6
-       VMACCVV         V1, V2, V0, V3                  // d7a120b4
-       VMACCVX         X10, V2, V3                     // d76125b6
-       VMACCVX         X10, V2, V0, V3                 // d76125b4
-       VNMSACVV        V1, V2, V3                      // d7a120be
-       VNMSACVV        V1, V2, V0, V3                  // d7a120bc
-       VNMSACVX        X10, V2, V3                     // d76125be
-       VNMSACVX        X10, V2, V0, V3                 // d76125bc
-       VMADDVV         V1, V2, V3                      // d7a120a6
-       VMADDVV         V1, V2, V0, V3                  // d7a120a4
-       VMADDVX         X10, V2, V3                     // d76125a6
-       VMADDVX         X10, V2, V0, V3                 // d76125a4
-       VNMSUBVV        V1, V2, V3                      // d7a120ae
-       VNMSUBVV        V1, V2, V0, V3                  // d7a120ac
-       VNMSUBVX        X10, V2, V3                     // d76125ae
-       VNMSUBVX        X10, V2, V0, V3                 // d76125ac
+       VMACCVV         V2, V1, V3                      // d7a120b6
+       VMACCVV         V2, V1, V0, V3                  // d7a120b4
+       VMACCVX         V2, X10, V3                     // d76125b6
+       VMACCVX         V2, X10, V0, V3                 // d76125b4
+       VNMSACVV        V2, V1, V3                      // d7a120be
+       VNMSACVV        V2, V1, V0, V3                  // d7a120bc
+       VNMSACVX        V2, X10, V3                     // d76125be
+       VNMSACVX        V2, X10, V0, V3                 // d76125bc
+       VMADDVV         V2, V1, V3                      // d7a120a6
+       VMADDVV         V2, V1, V0, V3                  // d7a120a4
+       VMADDVX         V2, X10, V3                     // d76125a6
+       VMADDVX         V2, X10, V0, V3                 // d76125a4
+       VNMSUBVV        V2, V1, V3                      // d7a120ae
+       VNMSUBVV        V2, V1, V0, V3                  // d7a120ac
+       VNMSUBVX        V2, X10, V3                     // d76125ae
+       VNMSUBVX        V2, X10, V0, V3                 // d76125ac
 
        // 31.11.14: Vector Widening Integer Multiply-Add Instructions
-       VWMACCUVV       V1, V2, V3                      // d7a120f2
-       VWMACCUVV       V1, V2, V0, V3                  // d7a120f0
-       VWMACCUVX       X10, V2, V3                     // d76125f2
-       VWMACCUVX       X10, V2, V0, V3                 // d76125f0
-       VWMACCVV        V1, V2, V3                      // d7a120f6
-       VWMACCVV        V1, V2, V0, V3                  // d7a120f4
-       VWMACCVX        X10, V2, V3                     // d76125f6
-       VWMACCVX        X10, V2, V0, V3                 // d76125f4
-       VWMACCSUVV      V1, V2, V3                      // d7a120fe
-       VWMACCSUVV      V1, V2, V0, V3                  // d7a120fc
-       VWMACCSUVX      X10, V2, V3                     // d76125fe
-       VWMACCSUVX      X10, V2, V0, V3                 // d76125fc
-       VWMACCUSVX      X10, V2, V3                     // d76125fa
-       VWMACCUSVX      X10, V2, V0, V3                 // d76125f8
+       VWMACCUVV       V2, V1, V3                      // d7a120f2
+       VWMACCUVV       V2, V1, V0, V3                  // d7a120f0
+       VWMACCUVX       V2, X10, V3                     // d76125f2
+       VWMACCUVX       V2, X10, V0, V3                 // d76125f0
+       VWMACCVV        V2, V1, V3                      // d7a120f6
+       VWMACCVV        V2, V1, V0, V3                  // d7a120f4
+       VWMACCVX        V2, X10, V3                     // d76125f6
+       VWMACCVX        V2, X10, V0, V3                 // d76125f4
+       VWMACCSUVV      V2, V1, V3                      // d7a120fe
+       VWMACCSUVV      V2, V1, V0, V3                  // d7a120fc
+       VWMACCSUVX      V2, X10, V3                     // d76125fe
+       VWMACCSUVX      V2, X10, V0, V3                 // d76125fc
+       VWMACCUSVX      V2, X10, V3                     // d76125fa
+       VWMACCUSVX      V2, X10, V0, V3                 // d76125f8
 
        // 31.11.15: Vector Integer Merge Instructions
        VMERGEVVM       V1, V2, V0, V3                  // d781205c
index 374a97dcfe979230e96a5612f3fcbd6c56fc75a7..55bf518e68bba5921aaacbc017d55c529462b5fc 100644 (file)
@@ -214,19 +214,19 @@ TEXT validation(SB),$0
        VWMULUVX        V1, V2, V3                      // ERROR "expected integer register in rs1 position"
        VWMULSUVV       X10, V2, V3                     // ERROR "expected vector register in vs1 position"
        VWMULSUVX       V1, V2, V3                      // ERROR "expected integer register in rs1 position"
-       VMACCVV         X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VMACCVV         V2, X10, V3                     // ERROR "expected vector register in vs1 position"
        VMACCVX         V1, V2, V3                      // ERROR "expected integer register in rs1 position"
-       VNMSACVV        X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VNMSACVV        V2, X10, V3                     // ERROR "expected vector register in vs1 position"
        VNMSACVX        V1, V2, V3                      // ERROR "expected integer register in rs1 position"
-       VMADDVV         X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VMADDVV         V2, X10, V3                     // ERROR "expected vector register in vs1 position"
        VMADDVX         V1, V2, V3                      // ERROR "expected integer register in rs1 position"
-       VNMSUBVV        X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VNMSUBVV        V2, X10, V3                     // ERROR "expected vector register in vs1 position"
        VNMSUBVX        V1, V2, V3                      // ERROR "expected integer register in rs1 position"
-       VWMACCUVV       X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VWMACCUVV       V2, X10, V3                     // ERROR "expected vector register in vs1 position"
        VWMACCUVX       V1, V2, V3                      // ERROR "expected integer register in rs1 position"
-       VWMACCVV        X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VWMACCVV        V2, X10, V3                     // ERROR "expected vector register in vs1 position"
        VWMACCVX        V1, V2, V3                      // ERROR "expected integer register in rs1 position"
-       VWMACCSUVV      X10, V2, V3                     // ERROR "expected vector register in vs1 position"
+       VWMACCSUVV      V2, X10, V3                     // ERROR "expected vector register in vs1 position"
        VWMACCSUVX      V1, V2, V3                      // ERROR "expected integer register in rs1 position"
        VWMACCUSVX      V1, V2, V3                      // ERROR "expected integer register in rs1 position"
        VMERGEVVM       X10, V2, V0, V3                 // ERROR "expected vector register in vs1 position"
index 3c91a1f02c8ca4951f222cb9223b6b9c502be925..5b598b57574c4a977813c1894a8da220a047dfcf 100644 (file)
@@ -3697,8 +3697,6 @@ func instructionsForProg(p *obj.Prog) []*instruction {
                AVMULVV, AVMULVX, AVMULHVV, AVMULHVX, AVMULHUVV, AVMULHUVX, AVMULHSUVV, AVMULHSUVX,
                AVDIVUVV, AVDIVUVX, AVDIVVV, AVDIVVX, AVREMUVV, AVREMUVX, AVREMVV, AVREMVX,
                AVWMULVV, AVWMULVX, AVWMULUVV, AVWMULUVX, AVWMULSUVV, AVWMULSUVX, AVNSRLWV, AVNSRLWX, AVNSRAWV, AVNSRAWX,
-               AVMACCVV, AVMACCVX, AVNMSACVV, AVNMSACVX, AVMADDVV, AVMADDVX, AVNMSUBVV, AVNMSUBVX,
-               AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX,
                AVSADDUVV, AVSADDUVX, AVSADDUVI, AVSADDVV, AVSADDVX, AVSADDVI, AVSSUBUVV, AVSSUBUVX, AVSSUBVV, AVSSUBVX,
                AVAADDUVV, AVAADDUVX, AVAADDVV, AVAADDVX, AVASUBUVV, AVASUBUVX, AVASUBVV, AVASUBVX,
                AVSMULVV, AVSMULVX, AVSSRLVV, AVSSRLVX, AVSSRLVI, AVSSRAVV, AVSSRAVX, AVSSRAVI,
@@ -3724,7 +3722,9 @@ func instructionsForProg(p *obj.Prog) []*instruction {
 
        case AVFMACCVV, AVFMACCVF, AVFNMACCVV, AVFNMACCVF, AVFMSACVV, AVFMSACVF, AVFNMSACVV, AVFNMSACVF,
                AVFMADDVV, AVFMADDVF, AVFNMADDVV, AVFNMADDVF, AVFMSUBVV, AVFMSUBVF, AVFNMSUBVV, AVFNMSUBVF,
-               AVFWMACCVV, AVFWMACCVF, AVFWNMACCVV, AVFWNMACCVF, AVFWMSACVV, AVFWMSACVF, AVFWNMSACVV, AVFWNMSACVF:
+               AVFWMACCVV, AVFWMACCVF, AVFWNMACCVV, AVFWNMACCVF, AVFWMSACVV, AVFWMSACVF, AVFWNMSACVV, AVFWNMSACVF,
+               AVMACCVV, AVMACCVX, AVNMSACVV, AVNMSACVX, AVMADDVV, AVMADDVX, AVNMSUBVV, AVNMSUBVX,
+               AVWMACCUVV, AVWMACCUVX, AVWMACCVV, AVWMACCVX, AVWMACCSUVV, AVWMACCSUVX, AVWMACCUSVX:
                switch {
                case ins.rs3 == obj.REG_NONE:
                        ins.funct7 |= 1 // unmasked