]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile/internal: optimize multiplication use new operation 'ADDshiftLLV' on...
authorlimeidan <limeidan@loongson.cn>
Thu, 7 Aug 2025 03:23:55 +0000 (11:23 +0800)
committerabner chenc <chenguoqi@loongson.cn>
Wed, 13 Aug 2025 06:01:49 +0000 (23:01 -0700)
goos: linux
goarch: loong64
pkg: cmd/compile/internal/test
cpu: Loongson-3A6000-HV @ 2500.00MHz
                  │     old      │                 new                  │
                  │    sec/op    │    sec/op     vs base                │
MulconstI32/3       0.8004n ± 0%   0.4247n ± 2%  -46.94% (p=0.000 n=10)
MulconstI32/5       0.8005n ± 0%   0.4256n ± 1%  -46.83% (p=0.000 n=10)
MulconstI32/12      1.2010n ± 0%   0.8005n ± 0%  -33.35% (p=0.000 n=10)
MulconstI32/120     0.8090n ± 0%   0.8067n ± 0%   -0.28% (p=0.007 n=10)
MulconstI32/-120    0.8109n ± 0%   0.8072n ± 0%   -0.47% (p=0.000 n=10)
MulconstI32/65537   0.8004n ± 0%   0.8004n ± 0%        ~ (p=1.000 n=10)
MulconstI32/65538   0.8005n ± 0%   0.8005n ± 0%        ~ (p=0.265 n=10)
MulconstI64/3       0.8005n ± 0%   0.4241n ± 1%  -47.02% (p=0.000 n=10)
MulconstI64/5       0.8004n ± 0%   0.4249n ± 1%  -46.91% (p=0.000 n=10)
MulconstI64/12      1.2010n ± 0%   0.8004n ± 0%  -33.36% (p=0.000 n=10)
MulconstI64/120     0.8005n ± 0%   0.8005n ± 0%        ~ (p=0.635 n=10)
MulconstI64/-120    0.8005n ± 0%   0.8005n ± 0%        ~ (p=0.837 n=10)
MulconstI64/65537   0.8005n ± 0%   0.8005n ± 0%        ~ (p=0.837 n=10)
MulconstI64/65538   0.8096n ± 0%   0.8004n ± 0%   -1.14% (p=0.000 n=10)
MulconstU32/3       0.8004n ± 0%   0.4263n ± 1%  -46.75% (p=0.000 n=10)
MulconstU32/5       0.8005n ± 0%   0.4262n ± 1%  -46.76% (p=0.000 n=10)
MulconstU32/12      1.2010n ± 0%   0.8005n ± 0%  -33.35% (p=0.000 n=10)
MulconstU32/120     0.8105n ± 0%   0.8096n ± 0%        ~ (p=0.183 n=10)
MulconstU32/65537   0.8004n ± 0%   0.8004n ± 0%        ~ (p=1.000 n=10)
MulconstU32/65538   0.8005n ± 0%   0.8005n ± 0%        ~ (p=1.000 n=10)
MulconstU64/3       0.8004n ± 0%   0.4265n ± 4%  -46.71% (p=0.000 n=10)
MulconstU64/5       0.8004n ± 0%   0.4256n ± 0%  -46.82% (p=0.000 n=10)
MulconstU64/12      1.2010n ± 0%   0.8004n ± 0%  -33.36% (p=0.000 n=10)
MulconstU64/120     0.8005n ± 0%   0.8005n ± 0%        ~ (p=0.387 n=10)
MulconstU64/65537   0.8005n ± 0%   0.8005n ± 0%        ~ (p=0.265 n=10)
MulconstU64/65538   0.8080n ± 0%   0.8004n ± 0%   -0.93% (p=0.000 n=10)
geomean             0.8539n        0.6597n       -22.74%

Change-Id: Ie33e88985d7639f481bbba540bc917b9f185c357
Reviewed-on: https://go-review.googlesource.com/c/go/+/693855
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: sophie zhao <zhaoxiaolin@loongson.cn>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/cmd/compile/internal/loong64/ssa.go
src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
src/cmd/compile/internal/ssa/config.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/internal/obj/loong64/asm.go
test/codegen/arithmetic.go
test/codegen/multiply.go

index f8ecebb3509a773bc754fee97ed6f20a267725b8..c7fb903d5d646094351e982a4eb7fe2a775aaba0 100644 (file)
@@ -1065,6 +1065,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                        {Type: obj.TYPE_CONST, Offset: int64((v.AuxInt >> 0) & 0x1f)},
                })
 
+       case ssa.OpLOONG64ADDshiftLLV:
+               // ADDshiftLLV Rarg0, Rarg1, $shift
+               // ALSLV $shift, Rarg1, Rarg0, Rtmp
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = v.AuxInt
+               p.Reg = v.Args[1].Reg()
+               p.AddRestSourceReg(v.Args[0].Reg())
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
+
        case ssa.OpClobber, ssa.OpClobberReg:
                // TODO: implement for clobberdead experiment. Nop is ok for now.
        default:
index 75429cbffd782e8c5b548f0779ed77ea61d28b15..d6818e8592ee166bf444394e7a6e887263251977 100644 (file)
@@ -577,6 +577,8 @@ func init() {
                //   is $hint and bit[41:5] is $n.
                {name: "PRELD", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELD", hasSideEffects: true},
                {name: "PRELDX", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELDX", hasSideEffects: true},
+
+               {name: "ADDshiftLLV", argLength: 2, aux: "Int64", reg: gp21, asm: "ALSLV"}, // arg0 + arg1<<auxInt, the value of auxInt should be in the range [1, 4].
        }
 
        blocks := []blockData{
index 50ec2ec177a88bc3a62cb4d032c98b8c7fe95ca0..f2097170f47e04d46c90b33f4e416cdda4f9fd62 100644 (file)
@@ -566,7 +566,7 @@ func (c *Config) buildRecipes(arch string) {
                }
        case "loong64":
                // - multiply is 4 cycles.
-               // - add/sub/shift are 1 cycle.
+               // - add/sub/shift/alsl are 1 cycle.
                // On loong64, using a multiply also needs to load the constant into a register.
                // TODO: figure out a happy medium.
                mulCost = 45
@@ -601,6 +601,15 @@ func (c *Config) buildRecipes(arch string) {
                                        return m.Block.NewValue1I(m.Pos, OpLOONG64SLLVconst, m.Type, int64(i), x)
                                })
                }
+
+               // ADDshiftLLV
+               for i := 1; i < 5; i++ {
+                       c := 10
+                       r(1, 1<<i, c,
+                               func(m, x, y *Value) *Value {
+                                       return m.Block.NewValue2I(m.Pos, OpLOONG64ADDshiftLLV, m.Type, int64(i), x, y)
+                               })
+               }
        }
 
        c.mulRecipes = map[int64]mulRecipe{}
@@ -718,7 +727,7 @@ func (c *Config) buildRecipes(arch string) {
        // Currently:
        // len(c.mulRecipes) == 5984 on arm64
        //                       680 on amd64
-       //                      5984 on loong64
+       //                      9738 on loong64
        // This function takes ~2.5ms on arm64.
        //println(len(c.mulRecipes))
 }
index 06a05c6e3f8e9c972eadb2f3e199475e3c5e6ac0..e155eca5ff3b55dcd06d6a90d407ffa4baa615fd 100644 (file)
@@ -1970,6 +1970,7 @@ const (
        OpLOONG64LoweredPanicBoundsCC
        OpLOONG64PRELD
        OpLOONG64PRELDX
+       OpLOONG64ADDshiftLLV
 
        OpMIPSADD
        OpMIPSADDconst
@@ -26527,6 +26528,21 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "ADDshiftLLV",
+               auxType: auxInt64,
+               argLen:  2,
+               asm:     loong64.AALSLV,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+                               {1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+                       outputs: []outputInfo{
+                               {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+                       },
+               },
+       },
 
        {
                name:        "ADD",
index 76ad8e877935e12f5000fb564407a8fb456b6fde..ffd1177350b119af23db790a26a20831961f6bf3 100644 (file)
@@ -2743,8 +2743,8 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
 
        case 64: // alsl rd, rj, rk, sa2
                sa := p.From.Offset - 1
-               if sa > 3 {
-                       c.ctxt.Diag("The shift amount is too large.")
+               if sa < 0 || sa > 3 {
+                       c.ctxt.Diag("%v: shift amount out of range[1, 4].\n", p)
                }
                r := p.GetFrom3().Reg
                o1 = OP_2IRRR(c.opirrr(p.As), uint32(sa), uint32(r), uint32(p.Reg), uint32(p.To.Reg))
index 39a7986c7bc246cf4b81a5167b86378077b5073d..67adb50fa59bbf5e69d00f71460a585c8cef996d 100644 (file)
@@ -257,7 +257,7 @@ func Mul_96(n int) int {
        // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
        // arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
        // arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
-       // loong64:"ADDVU","SLLV\t[$]5",-"MULV"
+       // loong64:"SLLV\t[$]5","ALSLV\t[$]1,"
        // s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
        return n * 96
 }
@@ -317,7 +317,7 @@ func MergeMuls5(a, n int) int {
 // Multiplications folded negation
 
 func FoldNegMul(a int) int {
-       // loong64:"MULV","MOVV\t[$]-11",-"SUBVU\tR[0-9], R0,"
+       // loong64:"SUBVU","ALSLV\t[$]2","ALSLV\t[$]1"
        return (-a) * 11
 }
 
index bb22d1a2b19f3671589ef1e1a73c485feb9d1e9c..dc2910dab7b6fb5e7f27189b5156df87bd7c3a88 100644 (file)
@@ -24,7 +24,7 @@ func m2(x int64) int64 {
 func m3(x int64) int64 {
        // amd64: "LEAQ\t.*[*]2"
        // arm64: "ADD\tR[0-9]+<<1,"
-       // loong64: "ADDVU","ADDVU"
+       // loong64: "ALSLV\t[$]1,"
        return x * 3
 }
 func m4(x int64) int64 {
@@ -36,19 +36,19 @@ func m4(x int64) int64 {
 func m5(x int64) int64 {
        // amd64: "LEAQ\t.*[*]4"
        // arm64: "ADD\tR[0-9]+<<2,"
-       // loong64: "SLLV\t[$]2,","ADDVU"
+       // loong64: "ALSLV\t[$]2,"
        return x * 5
 }
 func m6(x int64) int64 {
        // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]2"
        // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<1,"
-       // loong64: "ADDVU","ADDVU","ADDVU"
+       // loong64: "ADDVU", "ADDVU", "ADDVU"
        return x * 6
 }
 func m7(x int64) int64 {
        // amd64: "LEAQ\t.*[*]2"
        // arm64: "LSL\t[$]3,", "SUB\tR[0-9]+,"
-       // loong64: "SLLV\t[$]3,","SUBVU"
+       // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]1,"
        return x * 7
 }
 func m8(x int64) int64 {
@@ -60,43 +60,43 @@ func m8(x int64) int64 {
 func m9(x int64) int64 {
        // amd64: "LEAQ\t.*[*]8"
        // arm64: "ADD\tR[0-9]+<<3,"
-       // loong64: "SLLV\t[$]3,","ADDVU"
+       // loong64: "ALSLV\t[$]3,"
        return x * 9
 }
 func m10(x int64) int64 {
        // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]4"
        // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<2,"
-       // loong64: "ADDVU","SLLV\t[$]3,","ADDVU"
+       // loong64: "ADDVU", "ALSLV\t[$]2,"
        return x * 10
 }
 func m11(x int64) int64 {
        // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]2"
        // arm64: "MOVD\t[$]11,", "MUL"
-       // loong64: "MOVV\t[$]11,", "MULV"
+       // loong64: "ALSLV\t[$]2,", "ALSLV\t[$]1,"
        return x * 11
 }
 func m12(x int64) int64 {
        // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]2,"
        // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<1,"
-       // loong64: "ADDVU","ADDVU","SLLV\t[$]2,"
+       // loong64: "SLLV", "ALSLV\t[$]1,"
        return x * 12
 }
 func m13(x int64) int64 {
        // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4"
        // arm64: "MOVD\t[$]13,", "MUL"
-       // loong64: "MOVV\t[$]13,","MULV"
+       // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]2,"
        return x * 13
 }
 func m14(x int64) int64 {
        // amd64: "IMUL3Q\t[$]14,"
        // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+<<1,"
-       // loong64: "ADDVU","SLLV\t[$]4,","SUBVU"
+       // loong64: "ADDVU", "ALSLV\t[$]1", "ALSLV\t[$]2"
        return x * 14
 }
 func m15(x int64) int64 {
        // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4"
        // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+,"
-       // loong64: "SLLV\t[$]4,","SUBVU"
+       // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]2,"
        return x * 15
 }
 func m16(x int64) int64 {
@@ -108,79 +108,79 @@ func m16(x int64) int64 {
 func m17(x int64) int64 {
        // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8"
        // arm64: "ADD\tR[0-9]+<<4,"
-       // loong64: "SLLV\t[$]4,","ADDVU"
+       // loong64: "ALSLV\t[$]"
        return x * 17
 }
 func m18(x int64) int64 {
        // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8"
        // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<3,"
-       // loong64: "ADDVU","SLLV\t[$]4,","ADDVU"
+       // loong64: "ADDVU", "ALSLV\t[$]3,"
        return x * 18
 }
 func m19(x int64) int64 {
        // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]2"
        // arm64: "MOVD\t[$]19,", "MUL"
-       // loong64: "MOVV\t[$]19,","MULV"
+       // loong64: "ALSLV\t[$]3,", "ALSLV\t[$]1,"
        return x * 19
 }
 func m20(x int64) int64 {
        // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]2,"
        // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<2,"
-       // loong64: "SLLV\t[$]2,","SLLV\t[$]4,","ADDVU"
+       // loong64: "SLLV\t[$]2,", "ALSLV\t[$]2," 
        return x * 20
 }
 func m21(x int64) int64 {
        // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4"
        // arm64: "MOVD\t[$]21,", "MUL"
-       // loong64: "MOVV\t[$]21,","MULV"
+       // loong64: "ALSLV\t[$]2,", "ALSLV\t[$]2,"
        return x * 21
 }
 func m22(x int64) int64 {
        // amd64: "IMUL3Q\t[$]22,"
        // arm64: "MOVD\t[$]22,", "MUL"
-       // loong64: "MOVV\t[$]22,","MULV"
+       // loong64: "ADDVU", "ALSLV\t[$]2,", "ALSLV\t[$]2,"
        return x * 22
 }
 func m23(x int64) int64 {
        // amd64: "IMUL3Q\t[$]23,"
        // arm64: "MOVD\t[$]23,", "MUL"
-       // loong64: "MOVV\t[$]23,","MULV"
+       // loong64: "ALSLV\t[$]1,", "SUBVU", "ALSLV\t[$]3,"
        return x * 23
 }
 func m24(x int64) int64 {
        // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]3,"
        // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<1,"
-       // loong64: "ADDVU","ADDVU","SLLV\t[$]3,"
+       // loong64: "SLLV\t[$]3", "ALSLV\t[$]1,"
        return x * 24
 }
 func m25(x int64) int64 {
        // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4"
        // arm64: "MOVD\t[$]25,", "MUL"
-       // loong64: "MOVV\t[$]25,","MULV"
+       // loong64: "ALSLV\t[$]2,", "ALSLV\t[$]2,"
        return x * 25
 }
 func m26(x int64) int64 {
        // amd64: "IMUL3Q\t[$]26,"
        // arm64: "MOVD\t[$]26,", "MUL"
-       // loong64: "MOVV\t[$]26,","MULV"
+       // loong64: "ADDVU", "ALSLV\t[$]1,", "ALSLV\t[$]3,"
        return x * 26
 }
 func m27(x int64) int64 {
        // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]8"
        // arm64: "MOVD\t[$]27,", "MUL"
-       // loong64: "MOVV\t[$]27,","MULV"
+       // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]3,"
        return x * 27
 }
 func m28(x int64) int64 {
        // amd64: "IMUL3Q\t[$]28,"
        // arm64: "LSL\t[$]5, "SUB\tR[0-9]+<<2,"
-       // loong64: "SLLV\t[$]5,","SLLV\t[$]2,","SUBVU"
+       // loong64: "ALSLV\t[$]1,","SLLV\t[$]2,","ALSLV\t[$]3,"
        return x * 28
 }
 func m29(x int64) int64 {
        // amd64: "IMUL3Q\t[$]29,"
        // arm64: "MOVD\t[$]29,", "MUL"
-       // loong64: "MOVV\t[$]29,","MULV"
+       // loong64: "ALSLV\t[$]1,","SLLV\t[$]5,","SUBVU"
        return x * 29
 }
 func m30(x int64) int64 {
@@ -204,49 +204,49 @@ func m32(x int64) int64 {
 func m33(x int64) int64 {
        // amd64: "SHLQ\t[$]2,", "LEAQ\t.*[*]8"
        // arm64: "ADD\tR[0-9]+<<5,"
-       // loong64: "SLLV\t[$]5,","ADDVU"
+       // loong64: "ADDVU", "ALSLV\t[$]4,"
        return x * 33
 }
 func m34(x int64) int64 {
        // amd64: "SHLQ\t[$]5,", "LEAQ\t.*[*]2"
        // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<4,"
-       // loong64: "ADDVU","SLLV\t[$]5,","ADDVU"
+       // loong64: "ADDVU", "ALSLV\t[$]4,"
        return x * 34
 }
 func m35(x int64) int64 {
        // amd64: "IMUL3Q\t[$]35,"
        // arm64: "MOVD\t[$]35,", "MUL"
-       // loong64: "MOVV\t[$]35,","MULV"
+       // loong64: "ALSLV\t[$]4,", "ALSLV\t[$]1,"
        return x * 35
 }
 func m36(x int64) int64 {
        // amd64: "LEAQ\t.*[*]8", "SHLQ\t[$]2,"
        // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<3,"
-       // loong64: "SLLV\t[$]2,","SLLV\t[$]5,","ADDVU"
+       // loong64: "SLLV\t[$]2,", "ALSLV\t[$]3,"
        return x * 36
 }
 func m37(x int64) int64 {
        // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]4"
        // arm64: "MOVD\t[$]37,", "MUL"
-       // loong64: "MOVV\t[$]37,","MULV"
+       // loong64: "ALSLV\t[$]3,", "ALSLV\t[$]2,"
        return x * 37
 }
 func m38(x int64) int64 {
        // amd64: "IMUL3Q\t[$]38,"
        // arm64: "MOVD\t[$]38,", "MUL"
-       // loong64: "MOVV\t[$]38,","MULV"
+       // loong64: "ALSLV\t[$]3,", "ALSLV\t[$]2,"
        return x * 38
 }
 func m39(x int64) int64 {
        // amd64: "IMUL3Q\t[$]39,"
        // arm64: "MOVD\t[$]39,", "MUL"
-       // loong64: "MOVV\t[$]39,", "MULV"
+       // loong64: "ALSLV\t[$]2,", "SUBVU", "ALSLV\t[$]3,"
        return x * 39
 }
 func m40(x int64) int64 {
        // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]3,"
        // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<2,"
-       // loong64: "SLLV\t[$]3,","SLLV\t[$]5,","ADDVU"
+       // loong64: "SLLV\t[$]3,", "ALSLV\t[$]2,"
        return x * 40
 }
 
@@ -265,7 +265,7 @@ func mn2(x int64) int64 {
 func mn3(x int64) int64 {
        // amd64: "NEGQ", "LEAQ\t.*[*]2"
        // arm64: "SUB\tR[0-9]+<<2,"
-       // loong64: "SLLV\t[$]2,","SUBVU"
+       // loong64: "SUBVU", "ALSLV\t[$]1,"
        return x * -3
 }
 func mn4(x int64) int64 {
@@ -277,19 +277,19 @@ func mn4(x int64) int64 {
 func mn5(x int64) int64 {
        // amd64: "NEGQ", "LEAQ\t.*[*]4"
        // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<2,"
-       // loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]2,","SUBVU"
+       // loong64: "SUBVU", "ALSLV\t[$]2,"
        return x * -5
 }
 func mn6(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-6,"
        // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<2,"
-       // loong64: "ADDVU","SLLV\t[$]3,","SUBVU"
+       // loong64: "ADDVU", "SUBVU", "ALSLV\t[$]3,"
        return x * -6
 }
 func mn7(x int64) int64 {
        // amd64: "NEGQ", "LEAQ\t.*[*]8"
        // arm64: "SUB\tR[0-9]+<<3,"
-       // loong64: "SLLV\t[$]3","SUBVU"
+       // loong64: "SUBVU", "ALSLV\t[$]3,"
        return x * -7
 }
 func mn8(x int64) int64 {
@@ -301,43 +301,43 @@ func mn8(x int64) int64 {
 func mn9(x int64) int64 {
        // amd64: "NEGQ", "LEAQ\t.*[*]8"
        // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<3,"
-       // loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]3","SUBVU"
+       // loong64: "SUBVU", "ALSLV\t[$]3,"
        return x * -9
 }
 func mn10(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-10,"
        // arm64: "MOVD\t[$]-10,", "MUL"
-       // loong64: "MOVV\t[$]-10","MULV"
+       // loong64: "ADDVU", "ALSLV\t[$]3", "SUBVU"
        return x * -10
 }
 func mn11(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-11,"
        // arm64: "MOVD\t[$]-11,", "MUL"
-       // loong64: "MOVV\t[$]-11","MULV"
+       // loong64: "ALSLV\t[$]2,", "SUBVU", "ALSLV\t[$]4,"
        return x * -11
 }
 func mn12(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-12,"
        // arm64: "LSL\t[$]2,", "SUB\tR[0-9]+<<2,"
-       // loong64: "SLLV\t[$]2,","SLLV\t[$]4,","SUBVU"
+       // loong64: "SUBVU", "SLLV\t[$]2,", "ALSLV\t[$]4,"
        return x * -12
 }
 func mn13(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-13,"
        // arm64: "MOVD\t[$]-13,", "MUL"
-       // loong64: "MOVV\t[$]-13","MULV"
+       // loong64: "ALSLV\t[$]4,", "SLLV\t[$]2, ", "SUBVU"
        return x * -13
 }
 func mn14(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-14,"
        // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<3,"
-       // loong64: "ADDVU","SLLV\t[$]4,","SUBVU"
+       // loong64: "ADDVU", "SUBVU", "ALSLV\t[$]4,"
        return x * -14
 }
 func mn15(x int64) int64 {
        // amd64: "SHLQ\t[$]4,", "SUBQ"
        // arm64: "SUB\tR[0-9]+<<4,"
-       // loong64: "SLLV\t[$]4,","SUBVU"
+       // loong64: "SUBVU", "ALSLV\t[$]4,"
        return x * -15
 }
 func mn16(x int64) int64 {
@@ -349,24 +349,24 @@ func mn16(x int64) int64 {
 func mn17(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-17,"
        // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<4,"
-       // loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]4,","SUBVU"
+       // loong64: "SUBVU", "ALSLV\t[$]4,"
        return x * -17
 }
 func mn18(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-18,"
        // arm64: "MOVD\t[$]-18,", "MUL"
-       // loong64: "MOVV\t[$]-18","MULV"
+       // loong64: "ADDVU", "ALSLV\t[$]4,", "SUBVU"
        return x * -18
 }
 func mn19(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-19,"
        // arm64: "MOVD\t[$]-19,", "MUL"
-       // loong64: "MOVV\t[$]-19","MULV"
+       // loong64: "ALSLV\t[$]1,", "ALSLV\t[$]4,", "SUBVU"
        return x * -19
 }
 func mn20(x int64) int64 {
        // amd64: "IMUL3Q\t[$]-20,"
        // arm64: "MOVD\t[$]-20,", "MUL"
-       // loong64: "MOVV\t[$]-20","MULV"
+       // loong64: "SLLV\t[$]2,", "ALSLV\t[$]4,", "SUBVU"
        return x * -20
 }