cmd/compile: optimize multiplication rules on loong64

author Xiaolin Zhao <zhaoxiaolin@loongson.cn>

Thu, 22 May 2025 08:21:10 +0000 (16:21 +0800)

committer Gopher Robot <gobot@golang.org>

Fri, 1 Aug 2025 15:42:40 +0000 (08:42 -0700)
author Xiaolin Zhao <zhaoxiaolin@loongson.cn>
Thu, 22 May 2025 08:21:10 +0000 (16:21 +0800)
committer Gopher Robot <gobot@golang.org>
Fri, 1 Aug 2025 15:42:40 +0000 (08:42 -0700)
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules

index 3232af1e55cc62bc85331748a61e7c685dd2ec75..9d0ad0148fd0107b48c90376405b5f5ae6348a5a 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
@@ -750,10 +750,10 @@
  (SRLVconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVVconst [0])
  
  // mul by constant
-(MULV x (MOVVconst [-1])) => (NEGV x)
  (MULV _ (MOVVconst [0])) => (MOVVconst [0])
  (MULV x (MOVVconst [1])) => x
-(MULV x (MOVVconst [c])) && isPowerOfTwo(c) => (SLLVconst [log64(c)] x)
+
+(MULV  x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)}
  
  // div by constant
  (DIVVU x (MOVVconst [1])) => x
diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules

new file mode 100644 (file)

index 0000000..9584438
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules
@@ -0,0 +1,6 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Prefer addition when shifting left by one.
+(SLLVconst [1] x) => (ADDV x x)
diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go

index d4cd32a0d7b5b62f611642fcdce1805c3eec498d..50ec2ec177a88bc3a62cb4d032c98b8c7fe95ca0 100644 (file)
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@@ -283,6 +283,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
                 c.RegSize = 8
                 c.lowerBlock = rewriteBlockLOONG64
                 c.lowerValue = rewriteValueLOONG64
+               c.lateLowerBlock = rewriteBlockLOONG64latelower
+               c.lateLowerValue = rewriteValueLOONG64latelower
                 c.registers = registersLOONG64[:]
                 c.gpRegMask = gpRegMaskLOONG64
                 c.fpRegMask = fpRegMaskLOONG64
@@ -562,6 +564,43 @@ func (c *Config) buildRecipes(arch string) {
                                         return m.Block.NewValue2I(m.Pos, OpARM64SUBshiftLL, m.Type, int64(i), x, y)
                                 })
                 }
+       case "loong64":
+               // - multiply is 4 cycles.
+               // - add/sub/shift are 1 cycle.
+               // On loong64, using a multiply also needs to load the constant into a register.
+               // TODO: figure out a happy medium.
+               mulCost = 45
+
+               // add
+               r(1, 1, 10,
+                       func(m, x, y *Value) *Value {
+                               return m.Block.NewValue2(m.Pos, OpLOONG64ADDV, m.Type, x, y)
+                       })
+               // neg
+               r(-1, 0, 10,
+                       func(m, x, y *Value) *Value {
+                               return m.Block.NewValue1(m.Pos, OpLOONG64NEGV, m.Type, x)
+                       })
+               // sub
+               r(1, -1, 10,
+                       func(m, x, y *Value) *Value {
+                               return m.Block.NewValue2(m.Pos, OpLOONG64SUBV, m.Type, x, y)
+                       })
+
+               // regular shifts
+               for i := 1; i < 64; i++ {
+                       c := 10
+                       if i == 1 {
+                               // Prefer x<<1 over x+x.
+                               // Note that we eventually reverse this decision in LOONG64latelower.rules,
+                               // but this makes shift combining rules in LOONG64.rules simpler.
+                               c--
+                       }
+                       r(1<<i, 0, c,
+                               func(m, x, y *Value) *Value {
+                                       return m.Block.NewValue1I(m.Pos, OpLOONG64SLLVconst, m.Type, int64(i), x)
+                               })
+               }
         }
  
         c.mulRecipes = map[int64]mulRecipe{}
@@ -628,17 +667,58 @@ func (c *Config) buildRecipes(arch string) {
                 }
         }
  
+       // Currently we only process 3 linear combination instructions for loong64.
+       if arch == "loong64" {
+               // Three-instruction recipes.
+               // D: The first and the second are all single-instruction recipes, and they are also the third's inputs.
+               // E: The first single-instruction is the second's input, and the second is the third's input.
+
+               // D
+               for _, first := range linearCombos {
+                       for _, second := range linearCombos {
+                               for _, third := range linearCombos {
+                                       x := third.a*(first.a+first.b) + third.b*(second.a+second.b)
+                                       cost := first.cost + second.cost + third.cost
+                                       old := c.mulRecipes[x]
+                                       if (old.build == nil || cost < old.cost) && cost < mulCost {
+                                               c.mulRecipes[x] = mulRecipe{cost: cost, build: func(m, v *Value) *Value {
+                                                       v1 := first.build(m, v, v)
+                                                       v2 := second.build(m, v, v)
+                                                       return third.build(m, v1, v2)
+                                               }}
+                                       }
+                               }
+                       }
+               }
+
+               // E
+               for _, first := range linearCombos {
+                       for _, second := range linearCombos {
+                               for _, third := range linearCombos {
+                                       x := third.a*(second.a*(first.a+first.b)+second.b) + third.b
+                                       cost := first.cost + second.cost + third.cost
+                                       old := c.mulRecipes[x]
+                                       if (old.build == nil || cost < old.cost) && cost < mulCost {
+                                               c.mulRecipes[x] = mulRecipe{cost: cost, build: func(m, v *Value) *Value {
+                                                       v1 := first.build(m, v, v)
+                                                       v2 := second.build(m, v1, v)
+                                                       return third.build(m, v2, v)
+                                               }}
+                                       }
+                               }
+                       }
+               }
+       }
+
         // These cases should be handled specially by rewrite rules.
         // (Otherwise v * 1 == (neg (neg v)))
         delete(c.mulRecipes, 0)
         delete(c.mulRecipes, 1)
  
-       // Currently we assume that it doesn't help to do 3 linear
-       // combination instructions.
-
         // Currently:
         // len(c.mulRecipes) == 5984 on arm64
         //                       680 on amd64
+       //                      5984 on loong64
         // This function takes ~2.5ms on arm64.
         //println(len(c.mulRecipes))
  }
diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go

index 568f906a07d637d2e4530c3bbcb91ccf4a53a063..83242413f06ab7aa2a90a8d941c2206548fcec10 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
@@ -5537,20 +5537,8 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezeroidx(v *Value) bool {
  func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
-       // match: (MULV x (MOVVconst [-1]))
-       // result: (NEGV x)
-       for {
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       if v_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_1.AuxInt) != -1 {
-                               continue
-                       }
-                       v.reset(OpLOONG64NEGV)
-                       v.AddArg(x)
-                       return true
-               }
-               break
-       }
+       b := v.Block
+       config := b.Func.Config
         // match: (MULV _ (MOVVconst [0]))
         // result: (MOVVconst [0])
         for {
@@ -5578,8 +5566,8 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
                 break
         }
         // match: (MULV x (MOVVconst [c]))
-       // cond: isPowerOfTwo(c)
-       // result: (SLLVconst [log64(c)] x)
+       // cond: canMulStrengthReduce(config, c)
+       // result: {mulStrengthReduce(v, x, c)}
         for {
                 for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
                         x := v_0
@@ -5587,12 +5575,10 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
                                 continue
                         }
                         c := auxIntToInt64(v_1.AuxInt)
-                       if !(isPowerOfTwo(c)) {
+                       if !(canMulStrengthReduce(config, c)) {
                                 continue
                         }
-                       v.reset(OpLOONG64SLLVconst)
-                       v.AuxInt = int64ToAuxInt(log64(c))
-                       v.AddArg(x)
+                       v.copyOf(mulStrengthReduce(v, x, c))
                         return true
                 }
                 break
diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go b/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go

new file mode 100644 (file)

index 0000000..ef9b831
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go
@@ -0,0 +1,29 @@
+// Code generated from _gen/LOONG64latelower.rules using 'go generate'; DO NOT EDIT.
+
+package ssa
+
+func rewriteValueLOONG64latelower(v *Value) bool {
+       switch v.Op {
+       case OpLOONG64SLLVconst:
+               return rewriteValueLOONG64latelower_OpLOONG64SLLVconst(v)
+       }
+       return false
+}
+func rewriteValueLOONG64latelower_OpLOONG64SLLVconst(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (SLLVconst [1] x)
+       // result: (ADDV x x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 1 {
+                       break
+               }
+               x := v_0
+               v.reset(OpLOONG64ADDV)
+               v.AddArg2(x, x)
+               return true
+       }
+       return false
+}
+func rewriteBlockLOONG64latelower(b *Block) bool {
+       return false
+}
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go

index bd5540ec4bef742eb329827e0b69b1b73c1864eb..9f400065bdb6965b389c6f2cbeca8c014e2a8313 100644 (file)
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -228,6 +228,7 @@ func Pow2Muls(n1, n2 int) (int, int) {
         // 386:"SHLL\t[$]5",-"IMULL"
         // arm:"SLL\t[$]5",-"MUL"
         // arm64:"LSL\t[$]5",-"MUL"
+       // loong64:"SLLV\t[$]5",-"MULV"
         // ppc64x:"SLD\t[$]5",-"MUL"
         a := n1 * 32
  
@@ -235,6 +236,7 @@ func Pow2Muls(n1, n2 int) (int, int) {
         // 386:"SHLL\t[$]6",-"IMULL"
         // arm:"SLL\t[$]6",-"MUL"
         // arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
+       // loong64:"SLLV\t[$]6",-"MULV"
         // ppc64x:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
         b := -64 * n2
  
@@ -255,11 +257,13 @@ func Mul_96(n int) int {
         // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
         // arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
         // arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
+       // loong64:"ADDVU","SLLV\t[$]5",-"MULV"
         // s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
         return n * 96
  }
  
  func Mul_n120(n int) int {
+       // loong64:"SLLV\t[$]3","SLLV\t[$]7","SUBVU",-"MULV"
         // s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
         return n * -120
  }
diff --git a/test/codegen/multiply.go b/test/codegen/multiply.go

index e7c1ccea1a3251717ec096b1575ef6f238a77c73..bb22d1a2b19f3671589ef1e1a73c485feb9d1e9c 100644 (file)
--- a/test/codegen/multiply.go
+++ b/test/codegen/multiply.go
@@ -12,301 +12,361 @@ package codegen
  func m0(x int64) int64 {
         // amd64: "XORL"
         // arm64: "MOVD\tZR"
+       // loong64: "MOVV\t[$]0"
         return x * 0
  }
  func m2(x int64) int64 {
         // amd64: "ADDQ"
         // arm64: "ADD"
+       // loong64: "ADDVU"
         return x * 2
  }
  func m3(x int64) int64 {
         // amd64: "LEAQ\t.*[*]2"
         // arm64: "ADD\tR[0-9]+<<1,"
+       // loong64: "ADDVU","ADDVU"
         return x * 3
  }
  func m4(x int64) int64 {
         // amd64: "SHLQ\t[$]2,"
         // arm64: "LSL\t[$]2,"
+       // loong64: "SLLV\t[$]2,"
         return x * 4
  }
  func m5(x int64) int64 {
         // amd64: "LEAQ\t.*[*]4"
         // arm64: "ADD\tR[0-9]+<<2,"
+       // loong64: "SLLV\t[$]2,","ADDVU"
         return x * 5
  }
  func m6(x int64) int64 {
         // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]2"
         // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<1,"
+       // loong64: "ADDVU","ADDVU","ADDVU"
         return x * 6
  }
  func m7(x int64) int64 {
         // amd64: "LEAQ\t.*[*]2"
         // arm64: "LSL\t[$]3,", "SUB\tR[0-9]+,"
+       // loong64: "SLLV\t[$]3,","SUBVU"
         return x * 7
  }
  func m8(x int64) int64 {
         // amd64: "SHLQ\t[$]3,"
         // arm64: "LSL\t[$]3,"
+       // loong64: "SLLV\t[$]3,"
         return x * 8
  }
  func m9(x int64) int64 {
         // amd64: "LEAQ\t.*[*]8"
         // arm64: "ADD\tR[0-9]+<<3,"
+       // loong64: "SLLV\t[$]3,","ADDVU"
         return x * 9
  }
  func m10(x int64) int64 {
         // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]4"
         // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<2,"
+       // loong64: "ADDVU","SLLV\t[$]3,","ADDVU"
         return x * 10
  }
  func m11(x int64) int64 {
         // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]2"
         // arm64: "MOVD\t[$]11,", "MUL"
+       // loong64: "MOVV\t[$]11,", "MULV"
         return x * 11
  }
  func m12(x int64) int64 {
         // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]2,"
         // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<1,"
+       // loong64: "ADDVU","ADDVU","SLLV\t[$]2,"
         return x * 12
  }
  func m13(x int64) int64 {
         // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4"
         // arm64: "MOVD\t[$]13,", "MUL"
+       // loong64: "MOVV\t[$]13,","MULV"
         return x * 13
  }
  func m14(x int64) int64 {
         // amd64: "IMUL3Q\t[$]14,"
         // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+<<1,"
+       // loong64: "ADDVU","SLLV\t[$]4,","SUBVU"
         return x * 14
  }
  func m15(x int64) int64 {
         // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4"
         // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+,"
+       // loong64: "SLLV\t[$]4,","SUBVU"
         return x * 15
  }
  func m16(x int64) int64 {
         // amd64: "SHLQ\t[$]4,"
         // arm64: "LSL\t[$]4,"
+       // loong64: "SLLV\t[$]4,"
         return x * 16
  }
  func m17(x int64) int64 {
         // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8"
         // arm64: "ADD\tR[0-9]+<<4,"
+       // loong64: "SLLV\t[$]4,","ADDVU"
         return x * 17
  }
  func m18(x int64) int64 {
         // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8"
         // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<3,"
+       // loong64: "ADDVU","SLLV\t[$]4,","ADDVU"
         return x * 18
  }
  func m19(x int64) int64 {
         // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]2"
         // arm64: "MOVD\t[$]19,", "MUL"
+       // loong64: "MOVV\t[$]19,","MULV"
         return x * 19
  }
  func m20(x int64) int64 {
         // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]2,"
         // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<2,"
+       // loong64: "SLLV\t[$]2,","SLLV\t[$]4,","ADDVU"
         return x * 20
  }
  func m21(x int64) int64 {
         // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4"
         // arm64: "MOVD\t[$]21,", "MUL"
+       // loong64: "MOVV\t[$]21,","MULV"
         return x * 21
  }
  func m22(x int64) int64 {
         // amd64: "IMUL3Q\t[$]22,"
         // arm64: "MOVD\t[$]22,", "MUL"
+       // loong64: "MOVV\t[$]22,","MULV"
         return x * 22
  }
  func m23(x int64) int64 {
         // amd64: "IMUL3Q\t[$]23,"
         // arm64: "MOVD\t[$]23,", "MUL"
+       // loong64: "MOVV\t[$]23,","MULV"
         return x * 23
  }
  func m24(x int64) int64 {
         // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]3,"
         // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<1,"
+       // loong64: "ADDVU","ADDVU","SLLV\t[$]3,"
         return x * 24
  }
  func m25(x int64) int64 {
         // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4"
         // arm64: "MOVD\t[$]25,", "MUL"
+       // loong64: "MOVV\t[$]25,","MULV"
         return x * 25
  }
  func m26(x int64) int64 {
         // amd64: "IMUL3Q\t[$]26,"
         // arm64: "MOVD\t[$]26,", "MUL"
+       // loong64: "MOVV\t[$]26,","MULV"
         return x * 26
  }
  func m27(x int64) int64 {
         // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]8"
         // arm64: "MOVD\t[$]27,", "MUL"
+       // loong64: "MOVV\t[$]27,","MULV"
         return x * 27
  }
  func m28(x int64) int64 {
         // amd64: "IMUL3Q\t[$]28,"
         // arm64: "LSL\t[$]5, "SUB\tR[0-9]+<<2,"
+       // loong64: "SLLV\t[$]5,","SLLV\t[$]2,","SUBVU"
         return x * 28
  }
  func m29(x int64) int64 {
         // amd64: "IMUL3Q\t[$]29,"
         // arm64: "MOVD\t[$]29,", "MUL"
+       // loong64: "MOVV\t[$]29,","MULV"
         return x * 29
  }
  func m30(x int64) int64 {
         // amd64: "IMUL3Q\t[$]30,"
         // arm64: "LSL\t[$]5,", "SUB\tR[0-9]+<<1,"
+       // loong64: "ADDVU","SLLV\t[$]5,","SUBVU"
         return x * 30
  }
  func m31(x int64) int64 {
         // amd64: "SHLQ\t[$]5,", "SUBQ"
         // arm64: "LSL\t[$]5,", "SUB\tR[0-9]+,"
+       // loong64: "SLLV\t[$]5,","SUBVU"
         return x * 31
  }
  func m32(x int64) int64 {
         // amd64: "SHLQ\t[$]5,"
         // arm64: "LSL\t[$]5,"
+       // loong64: "SLLV\t[$]5,"
         return x * 32
  }
  func m33(x int64) int64 {
         // amd64: "SHLQ\t[$]2,", "LEAQ\t.*[*]8"
         // arm64: "ADD\tR[0-9]+<<5,"
+       // loong64: "SLLV\t[$]5,","ADDVU"
         return x * 33
  }
  func m34(x int64) int64 {
         // amd64: "SHLQ\t[$]5,", "LEAQ\t.*[*]2"
         // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<4,"
+       // loong64: "ADDVU","SLLV\t[$]5,","ADDVU"
         return x * 34
  }
  func m35(x int64) int64 {
         // amd64: "IMUL3Q\t[$]35,"
         // arm64: "MOVD\t[$]35,", "MUL"
+       // loong64: "MOVV\t[$]35,","MULV"
         return x * 35
  }
  func m36(x int64) int64 {
         // amd64: "LEAQ\t.*[*]8", "SHLQ\t[$]2,"
         // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<3,"
+       // loong64: "SLLV\t[$]2,","SLLV\t[$]5,","ADDVU"
         return x * 36
  }
  func m37(x int64) int64 {
         // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]4"
         // arm64: "MOVD\t[$]37,", "MUL"
+       // loong64: "MOVV\t[$]37,","MULV"
         return x * 37
  }
  func m38(x int64) int64 {
         // amd64: "IMUL3Q\t[$]38,"
         // arm64: "MOVD\t[$]38,", "MUL"
+       // loong64: "MOVV\t[$]38,","MULV"
         return x * 38
  }
  func m39(x int64) int64 {
         // amd64: "IMUL3Q\t[$]39,"
         // arm64: "MOVD\t[$]39,", "MUL"
+       // loong64: "MOVV\t[$]39,", "MULV"
         return x * 39
  }
  func m40(x int64) int64 {
         // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]3,"
         // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<2,"
+       // loong64: "SLLV\t[$]3,","SLLV\t[$]5,","ADDVU"
         return x * 40
  }
  
  func mn1(x int64) int64 {
         // amd64: "NEGQ\t"
         // arm64: "NEG\tR[0-9]+,"
+       // loong64: "SUBVU\tR[0-9], R0,"
         return x * -1
  }
  func mn2(x int64) int64 {
         // amd64: "NEGQ", "ADDQ"
         // arm64: "NEG\tR[0-9]+<<1,"
+       // loong64: "ADDVU","SUBVU\tR[0-9], R0,"
         return x * -2
  }
  func mn3(x int64) int64 {
         // amd64: "NEGQ", "LEAQ\t.*[*]2"
         // arm64: "SUB\tR[0-9]+<<2,"
+       // loong64: "SLLV\t[$]2,","SUBVU"
         return x * -3
  }
  func mn4(x int64) int64 {
         // amd64: "NEGQ", "SHLQ\t[$]2,"
         // arm64: "NEG\tR[0-9]+<<2,"
+       // loong64: "SLLV\t[$]2,","SUBVU\tR[0-9], R0,"
         return x * -4
  }
  func mn5(x int64) int64 {
         // amd64: "NEGQ", "LEAQ\t.*[*]4"
         // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<2,"
+       // loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]2,","SUBVU"
         return x * -5
  }
  func mn6(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-6,"
         // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<2,"
+       // loong64: "ADDVU","SLLV\t[$]3,","SUBVU"
         return x * -6
  }
  func mn7(x int64) int64 {
         // amd64: "NEGQ", "LEAQ\t.*[*]8"
         // arm64: "SUB\tR[0-9]+<<3,"
+       // loong64: "SLLV\t[$]3","SUBVU"
         return x * -7
  }
  func mn8(x int64) int64 {
         // amd64: "NEGQ", "SHLQ\t[$]3,"
         // arm64: "NEG\tR[0-9]+<<3,"
+       // loong64: "SLLV\t[$]3","SUBVU\tR[0-9], R0,"
         return x * -8
  }
  func mn9(x int64) int64 {
         // amd64: "NEGQ", "LEAQ\t.*[*]8"
         // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<3,"
+       // loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]3","SUBVU"
         return x * -9
  }
  func mn10(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-10,"
         // arm64: "MOVD\t[$]-10,", "MUL"
+       // loong64: "MOVV\t[$]-10","MULV"
         return x * -10
  }
  func mn11(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-11,"
         // arm64: "MOVD\t[$]-11,", "MUL"
+       // loong64: "MOVV\t[$]-11","MULV"
         return x * -11
  }
  func mn12(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-12,"
         // arm64: "LSL\t[$]2,", "SUB\tR[0-9]+<<2,"
+       // loong64: "SLLV\t[$]2,","SLLV\t[$]4,","SUBVU"
         return x * -12
  }
  func mn13(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-13,"
         // arm64: "MOVD\t[$]-13,", "MUL"
+       // loong64: "MOVV\t[$]-13","MULV"
         return x * -13
  }
  func mn14(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-14,"
         // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<3,"
+       // loong64: "ADDVU","SLLV\t[$]4,","SUBVU"
         return x * -14
  }
  func mn15(x int64) int64 {
         // amd64: "SHLQ\t[$]4,", "SUBQ"
         // arm64: "SUB\tR[0-9]+<<4,"
+       // loong64: "SLLV\t[$]4,","SUBVU"
         return x * -15
  }
  func mn16(x int64) int64 {
         // amd64: "NEGQ", "SHLQ\t[$]4,"
         // arm64: "NEG\tR[0-9]+<<4,"
+       // loong64: "SLLV\t[$]4,","SUBVU\tR[0-9], R0,"
         return x * -16
  }
  func mn17(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-17,"
         // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<4,"
+       // loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]4,","SUBVU"
         return x * -17
  }
  func mn18(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-18,"
         // arm64: "MOVD\t[$]-18,", "MUL"
+       // loong64: "MOVV\t[$]-18","MULV"
         return x * -18
  }
  func mn19(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-19,"
         // arm64: "MOVD\t[$]-19,", "MUL"
+       // loong64: "MOVV\t[$]-19","MULV"
         return x * -19
  }
  func mn20(x int64) int64 {
         // amd64: "IMUL3Q\t[$]-20,"
         // arm64: "MOVD\t[$]-20,", "MUL"
+       // loong64: "MOVV\t[$]-20","MULV"
         return x * -20
  }
author	Xiaolin Zhao <zhaoxiaolin@loongson.cn>
	Thu, 22 May 2025 08:21:10 +0000 (16:21 +0800)
committer	Gopher Robot <gobot@golang.org>
	Fri, 1 Aug 2025 15:42:40 +0000 (08:42 -0700)
src/cmd/compile/internal/ssa/_gen/LOONG64.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules	[new file with mode: 0644]	patch \| blob
src/cmd/compile/internal/ssa/config.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteLOONG64.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go	[new file with mode: 0644]	patch \| blob
test/codegen/arithmetic.go		patch \| blob \| history
test/codegen/multiply.go		patch \| blob \| history