From 3492e4262b751c5a117a3c77ba5e243b16918e61 Mon Sep 17 00:00:00 2001 From: Xiaolin Zhao Date: Wed, 3 Sep 2025 15:43:21 +0800 Subject: [PATCH] cmd/compile: simplify specific addition operations using the ADDV16 instruction On loong64, the addi.d instruction can only directly handle 12-bit immediate numbers. If a larger immediate number needs to be processed, it must first be placed in a register, and then the add.d instruction is used to complete the processing of the larger immediate number. If a larger immediate number c satisfies is32Bit(c) && c&0xffff == 0, then the ADDV16 instruction can be used to complete the addition operation. Removes 164 instructions from the go binary on loong64. Change-Id: I404de93cc4eaaa12fe424f5a0d61b03231215d1a Reviewed-on: https://go-review.googlesource.com/c/go/+/700536 Reviewed-by: Meidan Li Reviewed-by: Keith Randall Reviewed-by: Keith Randall Auto-Submit: Michael Pratt Reviewed-by: abner chenc LUCI-TryBot-Result: Go LUCI Reviewed-by: Michael Pratt --- src/cmd/compile/internal/loong64/ssa.go | 1 + src/cmd/compile/internal/ssa/_gen/LOONG64.rules | 1 + src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go | 9 +++++---- src/cmd/compile/internal/ssa/opGen.go | 15 +++++++++++++++ src/cmd/compile/internal/ssa/rewriteLOONG64.go | 14 ++++++++++++++ test/codegen/arithmetic.go | 5 +++++ 6 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go index 134575c85c..40fa10c6de 100644 --- a/src/cmd/compile/internal/loong64/ssa.go +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -276,6 +276,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpLOONG64ADDVconst, + ssa.OpLOONG64ADDV16const, ssa.OpLOONG64SUBVconst, ssa.OpLOONG64ANDconst, ssa.OpLOONG64ORconst, diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index 3fa4f363f6..501d374529 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -794,6 +794,7 @@ (SUBVconst [c] (SUBVconst [d] x)) && is32Bit(-c-d) => (ADDVconst [-c-d] x) (SUBVconst [c] (ADDVconst [d] x)) && is32Bit(-c+d) => (ADDVconst [-c+d] x) (SUBV (MOVVconst [c]) (NEGV (SUBVconst [d] x))) => (ADDVconst [c-d] x) +(ADDVconst [c] x) && is32Bit(c) && c&0xffff == 0 && c != 0 => (ADDV16const [c] x) (SLLVconst [c] (MOVVconst [d])) => (MOVVconst [d< (MOVVconst [int64(uint64(d)>>uint64(c))]) (SRAVconst [c] (MOVVconst [d])) => (MOVVconst [d>>uint64(c)]) diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go index bee619f6d9..0d5e0eb76f 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go @@ -189,10 +189,11 @@ func init() { {name: "VPCNT16", argLength: 1, reg: fp11, asm: "VPCNTH"}, // count set bits for each 16-bit unit and store the result in each 16-bit unit // binary ops - {name: "ADDV", argLength: 2, reg: gp21, asm: "ADDVU", commutative: true}, // arg0 + arg1 - {name: "ADDVconst", argLength: 1, reg: gp11sp, asm: "ADDVU", aux: "Int64"}, // arg0 + auxInt. auxInt is 32-bit, also in other *const ops. - {name: "SUBV", argLength: 2, reg: gp21, asm: "SUBVU"}, // arg0 - arg1 - {name: "SUBVconst", argLength: 1, reg: gp11, asm: "SUBVU", aux: "Int64"}, // arg0 - auxInt + {name: "ADDV", argLength: 2, reg: gp21, asm: "ADDVU", commutative: true}, // arg0 + arg1 + {name: "ADDVconst", argLength: 1, reg: gp11sp, asm: "ADDVU", aux: "Int64"}, // arg0 + auxInt. auxInt is 32-bit, also in other *const ops. + {name: "ADDV16const", argLength: 1, reg: gp11sp, asm: "ADDV16", aux: "Int64"}, // arg0 + auxInt. auxInt is signed 32-bit and is a multiple of 65536, also in other *const ops. + {name: "SUBV", argLength: 2, reg: gp21, asm: "SUBVU"}, // arg0 - arg1 + {name: "SUBVconst", argLength: 1, reg: gp11, asm: "SUBVU", aux: "Int64"}, // arg0 - auxInt {name: "MULV", argLength: 2, reg: gp21, asm: "MULV", commutative: true, typ: "Int64"}, // arg0 * arg1 {name: "MULHV", argLength: 2, reg: gp21, asm: "MULHV", commutative: true, typ: "Int64"}, // (arg0 * arg1) >> 64, signed diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 4aef2d2aa1..d9cccb27ba 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1790,6 +1790,7 @@ const ( OpLOONG64VPCNT16 OpLOONG64ADDV OpLOONG64ADDVconst + OpLOONG64ADDV16const OpLOONG64SUBV OpLOONG64SUBVconst OpLOONG64MULV @@ -24067,6 +24068,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADDV16const", + auxType: auxInt64, + argLen: 1, + asm: loong64.AADDV16, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741820}, // SP R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "SUBV", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index 5890fe050a..c49ce31ae4 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -2008,6 +2008,20 @@ func rewriteValueLOONG64_OpLOONG64ADDVconst(v *Value) bool { v.AddArg(x) return true } + // match: (ADDVconst [c] x) + // cond: is32Bit(c) && c&0xffff == 0 && c != 0 + // result: (ADDV16const [c] x) + for { + c := auxIntToInt64(v.AuxInt) + x := v_0 + if !(is32Bit(c) && c&0xffff == 0 && c != 0) { + break + } + v.reset(OpLOONG64ADDV16const) + v.AuxInt = int64ToAuxInt(c) + v.AddArg(x) + return true + } return false } func rewriteValueLOONG64_OpLOONG64AND(v *Value) bool { diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go index beabfe24eb..7055db3dc9 100644 --- a/test/codegen/arithmetic.go +++ b/test/codegen/arithmetic.go @@ -51,6 +51,11 @@ func AddLargeConst(a uint64, out []uint64) { out[9] = a - 32769 } +func AddLargeConst2(a int, out []int) { + // loong64: -"ADDVU","ADDV16" + out[0] = a + 0x10000 +} + // ----------------- // // Subtraction // // ----------------- // -- 2.52.0