From: Russ Cox Date: Mon, 27 Oct 2025 23:41:39 +0000 (-0400) Subject: cmd/compile: implement bits.Mul64 on 32-bit systems X-Git-Tag: go1.26rc1~423 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=1e5bb416d887b1cf2bd9b6a3b9f05222d44c3ffc;p=gostls13.git cmd/compile: implement bits.Mul64 on 32-bit systems This CL implements Mul64uhilo, Hmul64, Hmul64u, and Avg64u on 32-bit systems, with the effect that constant division of both int64s and uint64s can now be emitted directly in all cases, and also that bits.Mul64 can be intrinsified on 32-bit systems. Previously, constant division of uint64s by values 0 ≤ c ≤ 0xFFFF were implemented as uint32 divisions by c and some fixup. After expanding those smaller constant divisions, the code for i/999 required: (386) 7 mul, 10 add, 2 sub, 3 rotate, 3 shift (104 bytes) (arm) 7 mul, 9 add, 3 sub, 2 shift (104 bytes) (mips) 7 mul, 10 add, 5 sub, 6 shift, 3 sgtu (176 bytes) For that much code, we might as well use a full 64x64->128 multiply that can be used for all divisors, not just small ones. Having done that, the same i/999 now generates: (386) 4 mul, 9 add, 2 sub, 2 or, 6 shift (112 bytes) (arm) 4 mul, 8 add, 2 sub, 2 or, 3 shift (92 bytes) (mips) 4 mul, 11 add, 3 sub, 6 shift, 8 sgtu, 4 or (196 bytes) The size increase on 386 is due to a few extra register spills. The size increase on mips is due to add-with-carry being hard. The new approach is more general, letting us delete the old special case and guarantee that all int64 and uint64 divisions by constants are generated directly on 32-bit systems. This especially speeds up code making heavy use of bits.Mul64 with a constant argument, which happens in strconv and various crypto packages. A few examples are benchmarked below. pkg: cmd/compile/internal/test benchmark \ host local linux-amd64 s7 linux-386 s7:GOARCH=386 vs base vs base vs base vs base vs base DivconstI64 ~ ~ ~ -49.66% -21.02% ModconstI64 ~ ~ ~ -13.45% +14.52% DivisiblePow2constI64 ~ ~ ~ +0.97% -1.32% DivisibleconstI64 ~ ~ ~ -20.01% -48.28% DivisibleWDivconstI64 ~ ~ -1.76% -38.59% -42.74% DivconstU64/3 ~ ~ ~ -13.82% -4.09% DivconstU64/5 ~ ~ ~ -14.10% -3.54% DivconstU64/37 -2.07% -4.45% ~ -19.60% -9.55% DivconstU64/1234567 ~ ~ ~ -61.55% -56.93% ModconstU64 ~ ~ ~ -6.25% ~ DivisibleconstU64 ~ ~ ~ -2.78% -7.82% DivisibleWDivconstU64 ~ ~ ~ +4.23% +2.56% pkg: math/bits benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386 vs base vs base vs base vs base Add ~ ~ ~ ~ Add32 +1.59% ~ ~ ~ Add64 ~ ~ ~ ~ Add64multiple ~ ~ ~ ~ Sub ~ ~ ~ ~ Sub32 ~ ~ ~ ~ Sub64 ~ ~ -9.20% ~ Sub64multiple ~ ~ ~ ~ Mul ~ ~ ~ ~ Mul32 ~ ~ ~ ~ Mul64 ~ ~ -41.58% -53.21% Div ~ ~ ~ ~ Div32 ~ ~ ~ ~ Div64 ~ ~ ~ ~ pkg: strconv benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386 vs base vs base vs base vs base ParseInt/Pos/7bit ~ ~ -11.08% -6.75% ParseInt/Pos/26bit ~ ~ -13.65% -11.02% ParseInt/Pos/31bit ~ ~ -14.65% -9.71% ParseInt/Pos/56bit -1.80% ~ -17.97% -10.78% ParseInt/Pos/63bit ~ ~ -13.85% -9.63% ParseInt/Neg/7bit ~ ~ -12.14% -7.26% ParseInt/Neg/26bit ~ ~ -14.18% -9.81% ParseInt/Neg/31bit ~ ~ -14.51% -9.02% ParseInt/Neg/56bit ~ ~ -15.79% -9.79% ParseInt/Neg/63bit ~ ~ -15.68% -11.07% AppendFloat/Decimal ~ ~ -7.25% -12.26% AppendFloat/Float ~ ~ -15.96% -19.45% AppendFloat/Exp ~ ~ -13.96% -17.76% AppendFloat/NegExp ~ ~ -14.89% -20.27% AppendFloat/LongExp ~ ~ -12.68% -17.97% AppendFloat/Big ~ ~ -11.10% -16.64% AppendFloat/BinaryExp ~ ~ ~ ~ AppendFloat/32Integer ~ ~ -10.05% -10.91% AppendFloat/32ExactFraction ~ ~ -8.93% -13.00% AppendFloat/32Point ~ ~ -10.36% -14.89% AppendFloat/32Exp ~ ~ -9.88% -13.54% AppendFloat/32NegExp ~ ~ -10.16% -14.26% AppendFloat/32Shortest ~ ~ -11.39% -14.96% AppendFloat/32Fixed8Hard ~ ~ ~ -2.31% AppendFloat/32Fixed9Hard ~ ~ ~ -7.01% AppendFloat/64Fixed1 ~ ~ -2.83% -8.23% AppendFloat/64Fixed2 ~ ~ ~ -7.94% AppendFloat/64Fixed3 ~ ~ -4.07% -7.22% AppendFloat/64Fixed4 ~ ~ -7.24% -7.62% AppendFloat/64Fixed12 ~ ~ -6.57% -4.82% AppendFloat/64Fixed16 ~ ~ -4.00% -5.81% AppendFloat/64Fixed12Hard -2.22% ~ -4.07% -6.35% AppendFloat/64Fixed17Hard -2.12% ~ ~ -3.79% AppendFloat/64Fixed18Hard -1.89% ~ +2.48% ~ AppendFloat/Slowpath64 -1.85% ~ -14.49% -18.21% AppendFloat/SlowpathDenormal64 ~ ~ -13.08% -19.41% pkg: crypto/internal/fips140/nistec/fiat benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386 vs base vs base vs base vs base Mul/P224 ~ ~ -29.95% -39.60% Mul/P384 ~ ~ -37.11% -63.33% Mul/P521 ~ ~ -26.62% -12.42% Square/P224 +1.46% ~ -40.62% -49.18% Square/P384 ~ ~ -45.51% -69.68% Square/P521 +90.37% ~ -25.26% -11.23% (The +90% is a separate problem and not real; that much variation can be seen on that system by running the same binary from two different files.) pkg: crypto/internal/fips140/edwards25519 benchmark \ host s7 linux-amd64 linux-386 s7:GOARCH=386 vs base vs base vs base vs base EncodingDecoding ~ ~ -34.67% -35.75% ScalarBaseMult ~ ~ -31.25% -30.29% ScalarMult ~ ~ -33.45% -32.54% VarTimeDoubleScalarBaseMult ~ ~ -33.78% -33.68% Change-Id: Id3c91d42cd01def6731b755e99f8f40c6ad1bb65 Reviewed-on: https://go-review.googlesource.com/c/go/+/716061 LUCI-TryBot-Result: Go LUCI Auto-Submit: Russ Cox Reviewed-by: Keith Randall Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/arm/ssa.go b/src/cmd/compile/internal/arm/ssa.go index a3bfb491b8..b31ffa474b 100644 --- a/src/cmd/compile/internal/arm/ssa.go +++ b/src/cmd/compile/internal/arm/ssa.go @@ -245,6 +245,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Type = obj.TYPE_REG p.To.Reg = r case ssa.OpARMADDS, + ssa.OpARMADCS, ssa.OpARMSUBS: r := v.Reg0() r1 := v.Args[0].Reg() diff --git a/src/cmd/compile/internal/ssa/_gen/386.rules b/src/cmd/compile/internal/ssa/_gen/386.rules index 5f11502419..cbe56f7579 100644 --- a/src/cmd/compile/internal/ssa/_gen/386.rules +++ b/src/cmd/compile/internal/ssa/_gen/386.rules @@ -7,6 +7,7 @@ (Add(32|64)F ...) => (ADDS(S|D) ...) (Add32carry ...) => (ADDLcarry ...) (Add32withcarry ...) => (ADCL ...) +(Add32carrywithcarry ...) => (ADCLcarry ...) (Sub(Ptr|32|16|8) ...) => (SUBL ...) (Sub(32|64)F ...) => (SUBS(S|D) ...) diff --git a/src/cmd/compile/internal/ssa/_gen/386Ops.go b/src/cmd/compile/internal/ssa/_gen/386Ops.go index 60599a33ab..09bfc4226f 100644 --- a/src/cmd/compile/internal/ssa/_gen/386Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/386Ops.go @@ -90,22 +90,23 @@ func init() { // Common regInfo var ( - gp01 = regInfo{inputs: nil, outputs: gponly} - gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly} - gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly} - gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} - gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} - gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}} - gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} - gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly} - gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} - gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} - gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly} - gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}} - gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx} - gp21hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax} - gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax} - gp21mul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}} + gp01 = regInfo{inputs: nil, outputs: gponly} + gp11 = regInfo{inputs: []regMask{gp}, outputs: gponly} + gp11sp = regInfo{inputs: []regMask{gpsp}, outputs: gponly} + gp11sb = regInfo{inputs: []regMask{gpspsb}, outputs: gponly} + gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} + gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}} + gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} + gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly} + gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} + gp2carry1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} + gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} + gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly} + gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}} + gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx} + gp21hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax} + gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax} + gp21mul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}} gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}} gp1flags = regInfo{inputs: []regMask{gpsp}} @@ -181,10 +182,11 @@ func init() { {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true}, // arg0 + arg1 {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32", clobberFlags: true}, // arg0 + auxint - {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1, generates pair - {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true}, // arg0 + auxint, generates pair - {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags - {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags + {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true}, // arg0 + arg1, generates pair + {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true}, // arg0 + auxint, generates pair + {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags + {name: "ADCLcarry", argLength: 3, reg: gp2carry1carry, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags, generates pair + {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true}, // arg0 - arg1 {name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint diff --git a/src/cmd/compile/internal/ssa/_gen/ARM.rules b/src/cmd/compile/internal/ssa/_gen/ARM.rules index 18b5d6bba6..b63ca23de1 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARM.rules +++ b/src/cmd/compile/internal/ssa/_gen/ARM.rules @@ -6,6 +6,7 @@ (Add(32|64)F ...) => (ADD(F|D) ...) (Add32carry ...) => (ADDS ...) (Add32withcarry ...) => (ADC ...) +(Add32carrywithcarry ...) => (ADCS ...) (Sub(Ptr|32|16|8) ...) => (SUB ...) (Sub(32|64)F ...) => (SUB(F|D) ...) diff --git a/src/cmd/compile/internal/ssa/_gen/ARMOps.go b/src/cmd/compile/internal/ssa/_gen/ARMOps.go index 01cd48835e..59bb71b2e3 100644 --- a/src/cmd/compile/internal/ssa/_gen/ARMOps.go +++ b/src/cmd/compile/internal/ssa/_gen/ARMOps.go @@ -102,36 +102,37 @@ func init() { ) // Common regInfo var ( - gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} - gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} - gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}} - gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} - gp1flags = regInfo{inputs: []regMask{gpg}} - gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}} - gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} - gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}} - gp2flags = regInfo{inputs: []regMask{gpg, gpg}} - gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} - gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} - gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} - gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}} - gp3flags = regInfo{inputs: []regMask{gp, gp, gp}} - gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} - gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} - gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} - gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} - gp2store = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} - fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} - fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} - fp1flags = regInfo{inputs: []regMask{fp}} - fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp - gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")} - fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} - fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}} - fp2flags = regInfo{inputs: []regMask{fp, fp}} - fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} - fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} - readflags = regInfo{inputs: nil, outputs: []regMask{gp}} + gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} + gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} + gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}} + gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} + gp1flags = regInfo{inputs: []regMask{gpg}} + gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}} + gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} + gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}} + gp2flags = regInfo{inputs: []regMask{gpg, gpg}} + gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} + gp2flags1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}} + gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} + gp31 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} + gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}} + gp3flags = regInfo{inputs: []regMask{gp, gp, gp}} + gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}} + gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} + gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} + gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}} + gp2store = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} + fp01 = regInfo{inputs: nil, outputs: []regMask{fp}} + fp11 = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}} + fp1flags = regInfo{inputs: []regMask{fp}} + fpgp = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp + gpfp = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")} + fp21 = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}} + fp31 = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}} + fp2flags = regInfo{inputs: []regMask{fp, fp}} + fpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}} + fpstore = regInfo{inputs: []regMask{gpspsbg, fp}} + readflags = regInfo{inputs: nil, outputs: []regMask{gp}} ) ops := []opData{ // binary ops @@ -161,16 +162,17 @@ func init() { call: false, // TODO(mdempsky): Should this be true? }, - {name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true}, // arg0 + arg1, set carry flag - {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag - {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true}, // arg0 + arg1 + carry, arg2=flags - {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"}, // arg0 + auxInt + carry, arg1=flags - {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"}, // arg0 - arg1, set carry flag - {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag - {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag - {name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"}, // arg0 - arg1 - carry, arg2=flags - {name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"}, // arg0 - auxInt - carry, arg1=flags - {name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"}, // auxInt - arg0 - carry, arg1=flags + {name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true}, // arg0 + arg1, set carry flag + {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag + {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true}, // arg0 + arg1 + carry, arg2=flags + {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"}, // arg0 + auxInt + carry, arg1=flags + {name: "ADCS", argLength: 3, reg: gp2flags1carry, asm: "ADC", commutative: true}, // arg0 + arg1 + carrry, sets carry + {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"}, // arg0 - arg1, set carry flag + {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag + {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag + {name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"}, // arg0 - arg1 - carry, arg2=flags + {name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"}, // arg0 - auxInt - carry, arg1=flags + {name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"}, // auxInt - arg0 - carry, arg1=flags {name: "MULLU", argLength: 2, reg: gp22, asm: "MULLU", commutative: true}, // arg0 * arg1, high 32 bits in out0, low 32 bits in out1 {name: "MULA", argLength: 3, reg: gp31, asm: "MULA"}, // arg0 * arg1 + arg2 diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS.rules b/src/cmd/compile/internal/ssa/_gen/MIPS.rules index 80bf9017f5..fe1e00a4e4 100644 --- a/src/cmd/compile/internal/ssa/_gen/MIPS.rules +++ b/src/cmd/compile/internal/ssa/_gen/MIPS.rules @@ -9,6 +9,12 @@ (Select1 (Add32carry x y)) => (SGTU x (ADD x y)) (Add32withcarry x y c) => (ADD c (ADD x y)) +(Select0 (Add32carrywithcarry x y c)) => (ADD c (ADD x y)) +(Select1 (Add32carrywithcarry x y c)) => + (OR + (SGTU x xy:(ADD x y)) + (SGTU xy (ADD c xy))) + (Sub(Ptr|32|16|8) ...) => (SUB ...) (Sub(32|64)F ...) => (SUB(F|D) ...) diff --git a/src/cmd/compile/internal/ssa/_gen/dec64.rules b/src/cmd/compile/internal/ssa/_gen/dec64.rules index 589c2fcfc1..483818906e 100644 --- a/src/cmd/compile/internal/ssa/_gen/dec64.rules +++ b/src/cmd/compile/internal/ssa/_gen/dec64.rules @@ -6,8 +6,12 @@ // architectures. These rules work together with the decomposeBuiltin // pass which handles phis of these typ. +(Last ___) => v.Args[len(v.Args)-1] + (Int64Hi (Int64Make hi _)) => hi (Int64Lo (Int64Make _ lo)) => lo +(Select0 (MakeTuple x y)) => x +(Select1 (MakeTuple x y)) => y (Load ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() => (Int64Make @@ -60,30 +64,85 @@ (Arg {n} [off]) (Arg {n} [off+4])) -(Add64 x y) => - (Int64Make - (Add32withcarry - (Int64Hi x) - (Int64Hi y) - (Select1 (Add32carry (Int64Lo x) (Int64Lo y)))) - (Select0 (Add32carry (Int64Lo x) (Int64Lo y)))) +(Add64 x y) => + (Last + x0: (Int64Lo x) + x1: (Int64Hi x) + y0: (Int64Lo y) + y1: (Int64Hi y) + add: (Add32carry x0 y0) + (Int64Make + (Add32withcarry x1 y1 (Select1 add)) + (Select0 add))) + +(Sub64 x y) => + (Last + x0: (Int64Lo x) + x1: (Int64Hi x) + y0: (Int64Lo y) + y1: (Int64Hi y) + sub: (Sub32carry x0 y0) + (Int64Make + (Sub32withcarry x1 y1 (Select1 sub)) + (Select0 sub))) + +(Mul64 x y) => + (Last + x0: (Int64Lo x) + x1: (Int64Hi x) + y0: (Int64Lo y) + y1: (Int64Hi y) + x0y0: (Mul32uhilo x0 y0) + x0y0Hi: (Select0 x0y0) + x0y0Lo: (Select1 x0y0) + (Int64Make + (Add32 x0y0Hi + (Add32 + (Mul32 x0 y1) + (Mul32 x1 y0))) + x0y0Lo)) + +(Mul64uhilo x y) => + (Last + x0: (Int64Lo x) + x1: (Int64Hi x) + y0: (Int64Lo y) + y1: (Int64Hi y) + x0y0: (Mul32uhilo x0 y0) + x0y1: (Mul32uhilo x0 y1) + x1y0: (Mul32uhilo x1 y0) + x1y1: (Mul32uhilo x1 y1) + x0y0Hi: (Select0 x0y0) + x0y0Lo: (Select1 x0y0) + x0y1Hi: (Select0 x0y1) + x0y1Lo: (Select1 x0y1) + x1y0Hi: (Select0 x1y0) + x1y0Lo: (Select1 x1y0) + x1y1Hi: (Select0 x1y1) + x1y1Lo: (Select1 x1y1) + w1a: (Add32carry x0y0Hi x0y1Lo) + w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 w1a)) + w3a: (Add32withcarry x1y1Hi (Const32 [0]) (Select1 w2a)) + w1b: (Add32carry x1y0Lo (Select0 w1a)) + w2b: (Add32carrywithcarry x1y1Lo (Select0 w2a) (Select1 w1b)) + w3b: (Add32withcarry w3a (Const32 [0]) (Select1 w2b)) + (MakeTuple + (Int64Make w3b (Select0 w2b)) + (Int64Make (Select0 w1b) x0y0Lo))) + +(Hmul64u x y) => (Select0 (Mul64uhilo x y)) + +// Hacker's Delight p. 175: signed hmul = unsigned hmul - (x<0)&y - (y<0)&x. +(Hmul64 x y) => + (Last + p: (Hmul64u x y) + xSign: (Int64Make xs:(Rsh32x32 (Int64Hi x) (Const32 [31])) xs) + ySign: (Int64Make ys:(Rsh32x32 (Int64Hi y) (Const32 [31])) ys) + (Sub64 (Sub64 p (And64 xSign y)) (And64 ySign x))) + +// (x+y)/2 => (x-y)/2 + y +(Avg64u x y) => (Add64 (Rsh64Ux32 (Sub64 x y) (Const32 [1])) y) -(Sub64 x y) => - (Int64Make - (Sub32withcarry - (Int64Hi x) - (Int64Hi y) - (Select1 (Sub32carry (Int64Lo x) (Int64Lo y)))) - (Select0 (Sub32carry (Int64Lo x) (Int64Lo y)))) - -(Mul64 x y) => - (Int64Make - (Add32 - (Mul32 (Int64Lo x) (Int64Hi y)) - (Add32 - (Mul32 (Int64Hi x) (Int64Lo y)) - (Select0 (Mul32uhilo (Int64Lo x) (Int64Lo y))))) - (Select1 (Mul32uhilo (Int64Lo x) (Int64Lo y)))) (And64 x y) => (Int64Make diff --git a/src/cmd/compile/internal/ssa/_gen/divmod.rules b/src/cmd/compile/internal/ssa/_gen/divmod.rules index c7c9e13209..21e0a19406 100644 --- a/src/cmd/compile/internal/ssa/_gen/divmod.rules +++ b/src/cmd/compile/internal/ssa/_gen/divmod.rules @@ -118,7 +118,7 @@ (Hmul32 x (Const32 [int32(smagic32(c).m/2)])) (Const64 [smagic32(c).s - 1])) (Rsh32x64 x (Const64 [31]))) -(Div64 x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul => +(Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul => (Sub64 (Rsh64x64 (Hmul64 x (Const64 [int64(smagic64(c).m/2)])) @@ -132,7 +132,7 @@ (Add32 x (Hmul32 x (Const32 [int32(smagic32(c).m)]))) (Const64 [smagic32(c).s])) (Rsh32x64 x (Const64 [31]))) -(Div64 x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul => +(Div64 x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul => (Sub64 (Rsh64x64 (Add64 x (Hmul64 x (Const64 [int64(smagic64(c).m)]))) @@ -153,7 +153,7 @@ (Rsh32Ux64 (Hmul32u x (Const32 [int32(smagic32(c).m)])) (Const64 [smagic32(c).s])) -(Div64u x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul => +(Div64u x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.useHmul => (Rsh64Ux64 (Hmul64u x (Const64 [int64(smagic64(c).m)])) (Const64 [smagic64(c).s])) @@ -185,7 +185,7 @@ (Rsh32Ux64 (Hmul32u x (Const32 [int32(1<<31 + umagic32(c).m/2)])) (Const64 [umagic32(c).s - 1])) -(Div64u x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul => +(Div64u x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul => (Rsh64Ux64 (Hmul64u x (Const64 [int64(1<<63 + umagic64(c).m/2)])) (Const64 [umagic64(c).s - 1])) @@ -211,7 +211,7 @@ (Rsh32Ux64 x (Const64 [1])) (Const32 [int32(1<<31 + (umagic32(c).m+1)/2)])) (Const64 [umagic32(c).s - 2])) -(Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul => +(Div64u x (Const64 [c])) && umagicOK64(c) && c&1 == 0 && config.useHmul => (Rsh64Ux64 (Hmul64u (Rsh64Ux64 x (Const64 [1])) @@ -237,52 +237,7 @@ (Rsh32Ux64 (Avg32u x (Hmul32u x (Const32 [int32(umagic32(c).m)]))) (Const64 [umagic32(c).s - 1])) -(Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul => +(Div64u x (Const64 [c])) && umagicOK64(c) && config.useAvg && config.useHmul => (Rsh64Ux64 (Avg64u x (Hmul64u x (Const64 [int64(umagic64(c).m)]))) (Const64 [umagic64(c).s - 1])) - -// Case 9. For unsigned 64-bit divides on 32-bit machines, -// if the constant fits in 16 bits (so that the last term -// fits in 32 bits), convert to three 32-bit divides by a constant. -// -// If 1<<32 = Q * c + R -// and x = hi << 32 + lo -// -// Then x = (hi/c*c + hi%c) << 32 + lo -// = hi/c*c<<32 + hi%c<<32 + lo -// = hi/c*c<<32 + (hi%c)*(Q*c+R) + lo/c*c + lo%c -// = hi/c*c<<32 + (hi%c)*Q*c + lo/c*c + (hi%c*R+lo%c) -// and x / c = (hi/c)<<32 + (hi%c)*Q + lo/c + (hi%c*R+lo%c)/c -(Div64u x (Const64 [c])) && c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul => - (Add64 - (Add64 - (Add64 - (Lsh64x64 - (ZeroExt32to64 - (Div32u - (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) - (Const32 [int32(c)]))) - (Const64 [32])) - (ZeroExt32to64 (Div32u (Trunc64to32 x) (Const32 [int32(c)])))) - (Mul64 - (ZeroExt32to64 - (Mod32u - (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) - (Const32 [int32(c)]))) - (Const64 [int64((1<<32)/c)]))) - (ZeroExt32to64 - (Div32u - (Add32 - (Mod32u (Trunc64to32 x) (Const32 [int32(c)])) - (Mul32 - (Mod32u - (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) - (Const32 [int32(c)])) - (Const32 [int32((1<<32)%c)]))) - (Const32 [int32(c)])))) - -// Repeated from generic.rules, for expanding the expression above -// (which can then be further expanded to handle the nested Div32u). -(Mod32u x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c) - => (Sub32 x (Mul32 (Div32u x (Const32 [c])) (Const32 [c]))) diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules index 3f02644832..7e3aba1e5e 100644 --- a/src/cmd/compile/internal/ssa/_gen/generic.rules +++ b/src/cmd/compile/internal/ssa/_gen/generic.rules @@ -1106,13 +1106,13 @@ => (Sub32 x (Mul32 (Div32 x (Const32 [c])) (Const32 [c]))) (Mod64 x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63) => (Sub64 x (Mul64 (Div64 x (Const64 [c])) (Const64 [c]))) -(Mod8u x (Const8 [c])) && x.Op != OpConst8 && c > 0 && umagicOK8( c) +(Mod8u x (Const8 [c])) && x.Op != OpConst8 && c != 0 => (Sub8 x (Mul8 (Div8u x (Const8 [c])) (Const8 [c]))) -(Mod16u x (Const16 [c])) && x.Op != OpConst16 && c > 0 && umagicOK16(c) +(Mod16u x (Const16 [c])) && x.Op != OpConst16 && c != 0 => (Sub16 x (Mul16 (Div16u x (Const16 [c])) (Const16 [c]))) -(Mod32u x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c) +(Mod32u x (Const32 [c])) && x.Op != OpConst32 && c != 0 => (Sub32 x (Mul32 (Div32u x (Const32 [c])) (Const32 [c]))) -(Mod64u x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK64(c) +(Mod64u x (Const64 [c])) && x.Op != OpConst64 && c != 0 => (Sub64 x (Mul64 (Div64u x (Const64 [c])) (Const64 [c]))) // Set up for mod->mul+rot optimization in genericlateopt.rules. diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go index 1f6ad4e16d..09fb4bf03f 100644 --- a/src/cmd/compile/internal/ssa/_gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go @@ -16,6 +16,9 @@ package main // are signed or unsigned. var genericOps = []opData{ + // Pseudo-op. + {name: "Last", argLength: -1}, // return last element of tuple; for "let" bindings + // 2-input arithmetic // Types must be consistent with Go typing. Add, for example, must take two values // of the same type and produces that same type. @@ -557,8 +560,9 @@ var genericOps = []opData{ {name: "Int64Hi", argLength: 1, typ: "UInt32"}, // high 32-bit of arg0 {name: "Int64Lo", argLength: 1, typ: "UInt32"}, // low 32-bit of arg0 - {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry) - {name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1) + {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry) + {name: "Add32withcarry", argLength: 3, commutative: true}, // arg0 + arg1 + arg2, arg2=carry (0 or 1) + {name: "Add32carrywithcarry", argLength: 3, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1 + arg2, arg2=carry, returns (value, carry) {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry) {name: "Sub32withcarry", argLength: 3}, // arg0 - arg1 - arg2, arg2=carry (0 or 1) diff --git a/src/cmd/compile/internal/ssa/_gen/rulegen.go b/src/cmd/compile/internal/ssa/_gen/rulegen.go index f818b46511..e3a10707fe 100644 --- a/src/cmd/compile/internal/ssa/_gen/rulegen.go +++ b/src/cmd/compile/internal/ssa/_gen/rulegen.go @@ -1271,8 +1271,10 @@ func genResult0(rr *RuleRewrite, arch arch, result string, top, move bool, pos s case 0: case 1: rr.add(stmtf("%s.AddArg(%s)", v, all.String())) - default: + case 2, 3, 4, 5, 6: rr.add(stmtf("%s.AddArg%d(%s)", v, len(args), all.String())) + default: + rr.add(stmtf("%s.AddArgs(%s)", v, all.String())) } if cse != nil { @@ -1313,6 +1315,12 @@ outer: d++ case d > 0 && s[i] == close: d-- + case s[i] == ':': + // ignore spaces after colons + nonsp = true + for i+1 < len(s) && (s[i+1] == ' ' || s[i+1] == '\t') { + i++ + } default: nonsp = true } @@ -1347,7 +1355,7 @@ func extract(val string) (op, typ, auxint, aux string, args []string) { val = val[1 : len(val)-1] // remove () // Split val up into regions. - // Split by spaces/tabs, except those contained in (), {}, [], or <>. + // Split by spaces/tabs, except those contained in (), {}, [], or <> or after colon. s := split(val) // Extract restrictions and args. @@ -1471,7 +1479,7 @@ func splitNameExpr(arg string) (name, expr string) { // colon is inside the parens, such as in "(Foo x:(Bar))". return "", arg } - return arg[:colon], arg[colon+1:] + return arg[:colon], strings.TrimSpace(arg[colon+1:]) } func getBlockInfo(op string, arch arch) (name string, data blockData) { diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 16a983a568..264f4b3bf3 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -386,6 +386,7 @@ const ( Op386ADDLcarry Op386ADDLconstcarry Op386ADCL + Op386ADCLcarry Op386ADCLconst Op386SUBL Op386SUBLconst @@ -1182,6 +1183,7 @@ const ( OpARMADDSconst OpARMADC OpARMADCconst + OpARMADCS OpARMSUBS OpARMSUBSconst OpARMRSBSconst @@ -3010,6 +3012,7 @@ const ( OpWasmI64Rotl OpWasmI64Popcnt + OpLast OpAdd8 OpAdd16 OpAdd32 @@ -3336,6 +3339,7 @@ const ( OpInt64Lo OpAdd32carry OpAdd32withcarry + OpAdd32carrywithcarry OpSub32carry OpSub32withcarry OpAdd64carry @@ -3968,6 +3972,24 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADCLcarry", + argLen: 3, + commutative: true, + resultInArg0: true, + clobberFlags: true, + asm: x86.AADCL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 239}, // AX CX DX BX BP SI DI + {1, 239}, // AX CX DX BX BP SI DI + }, + outputs: []outputInfo{ + {1, 0}, + {0, 239}, // AX CX DX BX BP SI DI + }, + }, + }, { name: "ADCLconst", auxType: auxInt32, @@ -15792,6 +15814,22 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ADCS", + argLen: 3, + commutative: true, + asm: arm.AADC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + {1, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + outputs: []outputInfo{ + {1, 0}, + {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14 + }, + }, + }, { name: "SUBS", argLen: 2, @@ -40672,6 +40710,11 @@ var opcodeTable = [...]opInfo{ }, }, + { + name: "Last", + argLen: -1, + generic: true, + }, { name: "Add8", argLen: 2, @@ -42480,6 +42523,12 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, + { + name: "Add32carrywithcarry", + argLen: 3, + commutative: true, + generic: true, + }, { name: "Sub32carry", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go index 0495438710..be88dd3cdd 100644 --- a/src/cmd/compile/internal/ssa/rewrite386.go +++ b/src/cmd/compile/internal/ssa/rewrite386.go @@ -257,6 +257,9 @@ func rewriteValue386(v *Value) bool { case OpAdd32carry: v.Op = Op386ADDLcarry return true + case OpAdd32carrywithcarry: + v.Op = Op386ADCLcarry + return true case OpAdd32withcarry: v.Op = Op386ADCL return true diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go index 44380cf8f5..2a90e7b433 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM.go +++ b/src/cmd/compile/internal/ssa/rewriteARM.go @@ -446,6 +446,9 @@ func rewriteValueARM(v *Value) bool { case OpAdd32carry: v.Op = OpARMADDS return true + case OpAdd32carrywithcarry: + v.Op = OpARMADCS + return true case OpAdd32withcarry: v.Op = OpARMADC return true diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS.go b/src/cmd/compile/internal/ssa/rewriteMIPS.go index fda02e64d1..ff696337ef 100644 --- a/src/cmd/compile/internal/ssa/rewriteMIPS.go +++ b/src/cmd/compile/internal/ssa/rewriteMIPS.go @@ -6562,6 +6562,23 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool { v.AddArg2(x, y) return true } + // match: (Select0 (Add32carrywithcarry x y c)) + // result: (ADD c (ADD x y)) + for { + if v_0.Op != OpAdd32carrywithcarry { + break + } + t := v_0.Type + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpMIPSADD) + v.Type = t.FieldType(0) + v0 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0)) + v0.AddArg2(x, y) + v.AddArg2(c, v0) + return true + } // match: (Select0 (Sub32carry x y)) // result: (SUB x y) for { @@ -6759,6 +6776,29 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool { v.AddArg2(x, v0) return true } + // match: (Select1 (Add32carrywithcarry x y c)) + // result: (OR (SGTU x xy:(ADD x y)) (SGTU xy (ADD c xy))) + for { + if v_0.Op != OpAdd32carrywithcarry { + break + } + t := v_0.Type + c := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + v.reset(OpMIPSOR) + v.Type = typ.Bool + v0 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool) + xy := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0)) + xy.AddArg2(x, y) + v0.AddArg2(x, xy) + v2 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool) + v3 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0)) + v3.AddArg2(c, xy) + v2.AddArg2(xy, v3) + v.AddArg2(v0, v2) + return true + } // match: (Select1 (Sub32carry x y)) // result: (SGTU (SUB x y) x) for { diff --git a/src/cmd/compile/internal/ssa/rewritedec64.go b/src/cmd/compile/internal/ssa/rewritedec64.go index b4da78fd52..a0388551b5 100644 --- a/src/cmd/compile/internal/ssa/rewritedec64.go +++ b/src/cmd/compile/internal/ssa/rewritedec64.go @@ -12,6 +12,8 @@ func rewriteValuedec64(v *Value) bool { return rewriteValuedec64_OpAnd64(v) case OpArg: return rewriteValuedec64_OpArg(v) + case OpAvg64u: + return rewriteValuedec64_OpAvg64u(v) case OpBitLen64: return rewriteValuedec64_OpBitLen64(v) case OpBswap64: @@ -27,10 +29,16 @@ func rewriteValuedec64(v *Value) bool { return true case OpEq64: return rewriteValuedec64_OpEq64(v) + case OpHmul64: + return rewriteValuedec64_OpHmul64(v) + case OpHmul64u: + return rewriteValuedec64_OpHmul64u(v) case OpInt64Hi: return rewriteValuedec64_OpInt64Hi(v) case OpInt64Lo: return rewriteValuedec64_OpInt64Lo(v) + case OpLast: + return rewriteValuedec64_OpLast(v) case OpLeq64: return rewriteValuedec64_OpLeq64(v) case OpLeq64U: @@ -57,6 +65,8 @@ func rewriteValuedec64(v *Value) bool { return rewriteValuedec64_OpLsh8x64(v) case OpMul64: return rewriteValuedec64_OpMul64(v) + case OpMul64uhilo: + return rewriteValuedec64_OpMul64uhilo(v) case OpNeg64: return rewriteValuedec64_OpNeg64(v) case OpNeq64: @@ -101,6 +111,10 @@ func rewriteValuedec64(v *Value) bool { return rewriteValuedec64_OpRsh8Ux64(v) case OpRsh8x64: return rewriteValuedec64_OpRsh8x64(v) + case OpSelect0: + return rewriteValuedec64_OpSelect0(v) + case OpSelect1: + return rewriteValuedec64_OpSelect1(v) case OpSignExt16to64: return rewriteValuedec64_OpSignExt16to64(v) case OpSignExt32to64: @@ -133,29 +147,33 @@ func rewriteValuedec64_OpAdd64(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Add64 x y) - // result: (Int64Make (Add32withcarry (Int64Hi x) (Int64Hi y) (Select1 (Add32carry (Int64Lo x) (Int64Lo y)))) (Select0 (Add32carry (Int64Lo x) (Int64Lo y)))) + // match: (Add64 x y) + // result: (Last x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) add: (Add32carry x0 y0) (Int64Make (Add32withcarry x1 y1 (Select1 add)) (Select0 add))) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpInt64Make) - v0 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.Int32) - v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v2.AddArg(y) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4 := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) - v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v5.AddArg(x) - v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v6.AddArg(y) - v4.AddArg2(v5, v6) - v3.AddArg(v4) - v0.AddArg3(v1, v2, v3) - v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) - v7.AddArg(v4) - v.AddArg2(v0, v7) + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + x1.AddArg(x) + y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + y1.AddArg(y) + add := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) + add.AddArg2(x0, y0) + v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v6 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v7.AddArg(add) + v6.AddArg3(x1, y1, v7) + v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v8.AddArg(add) + v5.AddArg2(v6, v8) + v.AddArg6(x0, x1, y0, y1, add, v5) return true } } @@ -268,6 +286,28 @@ func rewriteValuedec64_OpArg(v *Value) bool { } return false } +func rewriteValuedec64_OpAvg64u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Avg64u x y) + // result: (Add64 (Rsh64Ux32 (Sub64 x y) (Const32 [1])) y) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpAdd64) + v0 := b.NewValue0(v.Pos, OpRsh64Ux32, t) + v1 := b.NewValue0(v.Pos, OpSub64, t) + v1.AddArg2(x, y) + v2 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v2.AuxInt = int32ToAuxInt(1) + v0.AddArg2(v1, v2) + v.AddArg2(v0, y) + return true + } +} func rewriteValuedec64_OpBitLen64(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -430,6 +470,62 @@ func rewriteValuedec64_OpEq64(v *Value) bool { return true } } +func rewriteValuedec64_OpHmul64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Hmul64 x y) + // result: (Last p: (Hmul64u x y) xSign: (Int64Make xs:(Rsh32x32 (Int64Hi x) (Const32 [31])) xs) ySign: (Int64Make ys:(Rsh32x32 (Int64Hi y) (Const32 [31])) ys) (Sub64 (Sub64 p (And64 xSign y)) (And64 ySign x))) + for { + x := v_0 + y := v_1 + v.reset(OpLast) + p := b.NewValue0(v.Pos, OpHmul64u, typ.UInt64) + p.AddArg2(x, y) + xSign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + xs := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32) + v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v3.AddArg(x) + v4 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v4.AuxInt = int32ToAuxInt(31) + xs.AddArg2(v3, v4) + xSign.AddArg2(xs, xs) + ySign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + ys := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + v7.AddArg(y) + ys.AddArg2(v7, v4) + ySign.AddArg2(ys, ys) + v8 := b.NewValue0(v.Pos, OpSub64, typ.Int64) + v9 := b.NewValue0(v.Pos, OpSub64, typ.Int64) + v10 := b.NewValue0(v.Pos, OpAnd64, typ.Int64) + v10.AddArg2(xSign, y) + v9.AddArg2(p, v10) + v11 := b.NewValue0(v.Pos, OpAnd64, typ.Int64) + v11.AddArg2(ySign, x) + v8.AddArg2(v9, v11) + v.AddArg4(p, xSign, ySign, v8) + return true + } +} +func rewriteValuedec64_OpHmul64u(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Hmul64u x y) + // result: (Select0 (Mul64uhilo x y)) + for { + x := v_0 + y := v_1 + v.reset(OpSelect0) + v0 := b.NewValue0(v.Pos, OpMul64uhilo, types.NewTuple(typ.UInt64, typ.UInt64)) + v0.AddArg2(x, y) + v.AddArg(v0) + return true + } +} func rewriteValuedec64_OpInt64Hi(v *Value) bool { v_0 := v.Args[0] // match: (Int64Hi (Int64Make hi _)) @@ -458,6 +554,14 @@ func rewriteValuedec64_OpInt64Lo(v *Value) bool { } return false } +func rewriteValuedec64_OpLast(v *Value) bool { + // match: (Last ___) + // result: v.Args[len(v.Args)-1] + for { + v.copyOf(v.Args[len(v.Args)-1]) + return true + } +} func rewriteValuedec64_OpLeq64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -1114,35 +1218,124 @@ func rewriteValuedec64_OpMul64(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Mul64 x y) - // result: (Int64Make (Add32 (Mul32 (Int64Lo x) (Int64Hi y)) (Add32 (Mul32 (Int64Hi x) (Int64Lo y)) (Select0 (Mul32uhilo (Int64Lo x) (Int64Lo y))))) (Select1 (Mul32uhilo (Int64Lo x) (Int64Lo y)))) + // match: (Mul64 x y) + // result: (Last x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y0Hi: (Select0 x0y0) x0y0Lo: (Select1 x0y0) (Int64Make (Add32 x0y0Hi (Add32 (Mul32 x0 y1) (Mul32 x1 y0))) x0y0Lo)) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpInt64Make) - v0 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) - v1 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) - v2 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v2.AddArg(x) - v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v3.AddArg(y) - v1.AddArg2(v2, v3) - v4 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) - v6 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v6.AddArg(x) - v7 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v7.AddArg(y) - v5.AddArg2(v6, v7) - v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) - v9 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) - v9.AddArg2(v2, v7) - v8.AddArg(v9) - v4.AddArg2(v5, v8) - v0.AddArg2(v1, v4) - v10 := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) - v10.AddArg(v9) - v.AddArg2(v0, v10) + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + x1.AddArg(x) + y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + y1.AddArg(y) + x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x0y0.AddArg2(x0, y0) + x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x0y0Hi.AddArg(x0y0) + x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x0y0Lo.AddArg(x0y0) + v7 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v8 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) + v9 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) + v10 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) + v10.AddArg2(x0, y1) + v11 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) + v11.AddArg2(x1, y0) + v9.AddArg2(v10, v11) + v8.AddArg2(x0y0Hi, v9) + v7.AddArg2(v8, x0y0Lo) + v.AddArgs(x0, x1, y0, y1, x0y0, x0y0Hi, x0y0Lo, v7) + return true + } +} +func rewriteValuedec64_OpMul64uhilo(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (Mul64uhilo x y) + // result: (Last x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y1: (Mul32uhilo x0 y1) x1y0: (Mul32uhilo x1 y0) x1y1: (Mul32uhilo x1 y1) x0y0Hi: (Select0 x0y0) x0y0Lo: (Select1 x0y0) x0y1Hi: (Select0 x0y1) x0y1Lo: (Select1 x0y1) x1y0Hi: (Select0 x1y0) x1y0Lo: (Select1 x1y0) x1y1Hi: (Select0 x1y1) x1y1Lo: (Select1 x1y1) w1a: (Add32carry x0y0Hi x0y1Lo) w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 w1a)) w3a: (Add32withcarry x1y1Hi (Const32 [0]) (Select1 w2a)) w1b: (Add32carry x1y0Lo (Select0 w1a)) w2b: (Add32carrywithcarry x1y1Lo (Select0 w2a) (Select1 w1b)) w3b: (Add32withcarry w3a (Const32 [0]) (Select1 w2b)) (MakeTuple (Int64Make w3b (Select0 w2b)) (Int64Make (Select0 w1b) x0y0Lo))) + for { + t := v.Type + x := v_0 + y := v_1 + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + x1.AddArg(x) + y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + y1.AddArg(y) + x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x0y0.AddArg2(x0, y0) + x0y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x0y1.AddArg2(x0, y1) + x1y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x1y0.AddArg2(x1, y0) + x1y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32)) + x1y1.AddArg2(x1, y1) + x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x0y0Hi.AddArg(x0y0) + x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x0y0Lo.AddArg(x0y0) + x0y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x0y1Hi.AddArg(x0y1) + x0y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x0y1Lo.AddArg(x0y1) + x1y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x1y0Hi.AddArg(x1y0) + x1y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x1y0Lo.AddArg(x1y0) + x1y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + x1y1Hi.AddArg(x1y1) + x1y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32) + x1y1Lo.AddArg(x1y1) + w1a := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) + w1a.AddArg2(x0y0Hi, x0y1Lo) + w2a := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags)) + v18 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v18.AddArg(w1a) + w2a.AddArg3(x0y1Hi, x1y0Hi, v18) + w3a := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32) + v20 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) + v20.AuxInt = int32ToAuxInt(0) + v21 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v21.AddArg(w2a) + w3a.AddArg3(x1y1Hi, v20, v21) + w1b := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) + v23 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v23.AddArg(w1a) + w1b.AddArg2(x1y0Lo, v23) + w2b := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags)) + v25 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v25.AddArg(w2a) + v26 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v26.AddArg(w1b) + w2b.AddArg3(x1y1Lo, v25, v26) + w3b := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32) + v28 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v28.AddArg(w2b) + w3b.AddArg3(w3a, v20, v28) + v29 := b.NewValue0(v.Pos, OpMakeTuple, types.NewTuple(typ.UInt64, typ.UInt64)) + v30 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v31 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v31.AddArg(w2b) + v30.AddArg2(w3b, v31) + v32 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v33 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v33.AddArg(w1b) + v32.AddArg2(v33, x0y0Lo) + v29.AddArg2(v30, v32) + v.AddArgs(x0, x1, y0, y1, x0y0, x0y1, x1y0, x1y1, x0y0Hi, x0y0Lo, x0y1Hi, x0y1Lo, x1y0Hi, x1y0Lo, x1y1Hi, x1y1Lo, w1a, w2a, w3a, w1b, w2b, w3b, v29) return true } } @@ -2705,6 +2898,34 @@ func rewriteValuedec64_OpRsh8x64(v *Value) bool { return true } } +func rewriteValuedec64_OpSelect0(v *Value) bool { + v_0 := v.Args[0] + // match: (Select0 (MakeTuple x y)) + // result: x + for { + if v_0.Op != OpMakeTuple { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValuedec64_OpSelect1(v *Value) bool { + v_0 := v.Args[0] + // match: (Select1 (MakeTuple x y)) + // result: y + for { + if v_0.Op != OpMakeTuple { + break + } + y := v_0.Args[1] + v.copyOf(y) + return true + } + return false +} func rewriteValuedec64_OpSignExt16to64(v *Value) bool { v_0 := v.Args[0] b := v.Block @@ -2815,29 +3036,33 @@ func rewriteValuedec64_OpSub64(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (Sub64 x y) - // result: (Int64Make (Sub32withcarry (Int64Hi x) (Int64Hi y) (Select1 (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select0 (Sub32carry (Int64Lo x) (Int64Lo y)))) + // match: (Sub64 x y) + // result: (Last x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) sub: (Sub32carry x0 y0) (Int64Make (Sub32withcarry x1 y1 (Select1 sub)) (Select0 sub))) for { + t := v.Type x := v_0 y := v_1 - v.reset(OpInt64Make) - v0 := b.NewValue0(v.Pos, OpSub32withcarry, typ.Int32) - v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) - v2.AddArg(y) - v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) - v4 := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) - v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v5.AddArg(x) - v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) - v6.AddArg(y) - v4.AddArg2(v5, v6) - v3.AddArg(v4) - v0.AddArg3(v1, v2, v3) - v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) - v7.AddArg(v4) - v.AddArg2(v0, v7) + v.reset(OpLast) + v.Type = t + x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + x0.AddArg(x) + x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + x1.AddArg(x) + y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32) + y0.AddArg(y) + y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32) + y1.AddArg(y) + sub := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags)) + sub.AddArg2(x0, y0) + v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64) + v6 := b.NewValue0(v.Pos, OpSub32withcarry, typ.UInt32) + v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags) + v7.AddArg(sub) + v6.AddArg3(x1, y1, v7) + v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32) + v8.AddArg(sub) + v5.AddArg2(v6, v8) + v.AddArg6(x0, x1, y0, y1, sub, v5) return true } } diff --git a/src/cmd/compile/internal/ssa/rewritedivmod.go b/src/cmd/compile/internal/ssa/rewritedivmod.go index fc37d84999..02978075a8 100644 --- a/src/cmd/compile/internal/ssa/rewritedivmod.go +++ b/src/cmd/compile/internal/ssa/rewritedivmod.go @@ -20,8 +20,6 @@ func rewriteValuedivmod(v *Value) bool { return rewriteValuedivmod_OpDiv8(v) case OpDiv8u: return rewriteValuedivmod_OpDiv8u(v) - case OpMod32u: - return rewriteValuedivmod_OpMod32u(v) } return false } @@ -646,7 +644,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { return true } // match: (Div64 x (Const64 [c])) - // cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul + // cond: smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul // result: (Sub64 (Rsh64x64 (Hmul64 x (Const64 [int64(smagic64(c).m/2)])) (Const64 [smagic64(c).s - 1])) (Rsh64x64 x (Const64 [63]))) for { t := v.Type @@ -655,7 +653,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul) { + if !(smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul) { break } v.reset(OpSub64) @@ -676,7 +674,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { return true } // match: (Div64 x (Const64 [c])) - // cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul + // cond: smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul // result: (Sub64 (Rsh64x64 (Add64 x (Hmul64 x (Const64 [int64(smagic64(c).m)]))) (Const64 [smagic64(c).s])) (Rsh64x64 x (Const64 [63]))) for { t := v.Type @@ -685,7 +683,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul) { + if !(smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul) { break } v.reset(OpSub64) @@ -716,7 +714,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { config := b.Func.Config typ := &b.Func.Config.Types // match: (Div64u x (Const64 [c])) - // cond: t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul + // cond: t.IsSigned() && smagicOK64(c) && config.useHmul // result: (Rsh64Ux64 (Hmul64u x (Const64 [int64(smagic64(c).m)])) (Const64 [smagic64(c).s])) for { t := v.Type @@ -725,7 +723,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul) { + if !(t.IsSigned() && smagicOK64(c) && config.useHmul) { break } v.reset(OpRsh64Ux64) @@ -740,7 +738,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { return true } // match: (Div64u x (Const64 [c])) - // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul + // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul // result: (Rsh64Ux64 (Hmul64u x (Const64 [int64(1<<63 + umagic64(c).m/2)])) (Const64 [umagic64(c).s - 1])) for { t := v.Type @@ -749,7 +747,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul) { + if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul) { break } v.reset(OpRsh64Ux64) @@ -764,7 +762,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { return true } // match: (Div64u x (Const64 [c])) - // cond: umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul + // cond: umagicOK64(c) && c&1 == 0 && config.useHmul // result: (Rsh64Ux64 (Hmul64u (Rsh64Ux64 x (Const64 [1])) (Const64 [int64(1<<63 + (umagic64(c).m+1)/2)])) (Const64 [umagic64(c).s - 2])) for { t := v.Type @@ -773,7 +771,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul) { + if !(umagicOK64(c) && c&1 == 0 && config.useHmul) { break } v.reset(OpRsh64Ux64) @@ -792,7 +790,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { return true } // match: (Div64u x (Const64 [c])) - // cond: umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul + // cond: umagicOK64(c) && config.useAvg && config.useHmul // result: (Rsh64Ux64 (Avg64u x (Hmul64u x (Const64 [int64(umagic64(c).m)]))) (Const64 [umagic64(c).s - 1])) for { t := v.Type @@ -801,7 +799,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul) { + if !(umagicOK64(c) && config.useAvg && config.useHmul) { break } v.reset(OpRsh64Ux64) @@ -817,66 +815,6 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool { v.AddArg2(v0, v3) return true } - // match: (Div64u x (Const64 [c])) - // cond: c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul - // result: (Add64 (Add64 (Add64 (Lsh64x64 (ZeroExt32to64 (Div32u (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) (Const32 [int32(c)]))) (Const64 [32])) (ZeroExt32to64 (Div32u (Trunc64to32 x) (Const32 [int32(c)])))) (Mul64 (ZeroExt32to64 (Mod32u (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) (Const32 [int32(c)]))) (Const64 [int64((1<<32)/c)]))) (ZeroExt32to64 (Div32u (Add32 (Mod32u (Trunc64to32 x) (Const32 [int32(c)])) (Mul32 (Mod32u (Trunc64to32 (Rsh64Ux64 x (Const64 [32]))) (Const32 [int32(c)])) (Const32 [int32((1<<32)%c)]))) (Const32 [int32(c)])))) - for { - x := v_0 - if v_1.Op != OpConst64 { - break - } - c := auxIntToInt64(v_1.AuxInt) - if !(c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul) { - break - } - v.reset(OpAdd64) - v0 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64) - v1 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64) - v2 := b.NewValue0(v.Pos, OpLsh64x64, typ.UInt64) - v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v4 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32) - v5 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32) - v6 := b.NewValue0(v.Pos, OpRsh64Ux64, typ.UInt64) - v7 := b.NewValue0(v.Pos, OpConst64, typ.UInt64) - v7.AuxInt = int64ToAuxInt(32) - v6.AddArg2(x, v7) - v5.AddArg(v6) - v8 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v8.AuxInt = int32ToAuxInt(int32(c)) - v4.AddArg2(v5, v8) - v3.AddArg(v4) - v2.AddArg2(v3, v7) - v9 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v10 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32) - v11 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32) - v11.AddArg(x) - v10.AddArg2(v11, v8) - v9.AddArg(v10) - v1.AddArg2(v2, v9) - v12 := b.NewValue0(v.Pos, OpMul64, typ.UInt64) - v13 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v14 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32) - v14.AddArg2(v5, v8) - v13.AddArg(v14) - v15 := b.NewValue0(v.Pos, OpConst64, typ.UInt64) - v15.AuxInt = int64ToAuxInt(int64((1 << 32) / c)) - v12.AddArg2(v13, v15) - v0.AddArg2(v1, v12) - v16 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v17 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32) - v18 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32) - v19 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32) - v19.AddArg2(v11, v8) - v20 := b.NewValue0(v.Pos, OpMul32, typ.UInt32) - v21 := b.NewValue0(v.Pos, OpConst32, typ.UInt32) - v21.AuxInt = int32ToAuxInt(int32((1 << 32) % c)) - v20.AddArg2(v14, v21) - v18.AddArg2(v19, v20) - v17.AddArg2(v18, v8) - v16.AddArg(v17) - v.AddArg2(v0, v16) - return true - } return false } func rewriteValuedivmod_OpDiv8(v *Value) bool { @@ -982,35 +920,6 @@ func rewriteValuedivmod_OpDiv8u(v *Value) bool { } return false } -func rewriteValuedivmod_OpMod32u(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (Mod32u x (Const32 [c])) - // cond: x.Op != OpConst32 && c > 0 && umagicOK32(c) - // result: (Sub32 x (Mul32 (Div32u x (Const32 [c])) (Const32 [c]))) - for { - t := v.Type - x := v_0 - if v_1.Op != OpConst32 { - break - } - c := auxIntToInt32(v_1.AuxInt) - if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) { - break - } - v.reset(OpSub32) - v0 := b.NewValue0(v.Pos, OpMul32, t) - v1 := b.NewValue0(v.Pos, OpDiv32u, t) - v2 := b.NewValue0(v.Pos, OpConst32, t) - v2.AuxInt = int32ToAuxInt(c) - v1.AddArg2(x, v2) - v0.AddArg2(v1, v2) - v.AddArg2(x, v0) - return true - } - return false -} func rewriteBlockdivmod(b *Block) bool { return false } diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go index 891f017d7b..fd5139c0bb 100644 --- a/src/cmd/compile/internal/ssa/rewritegeneric.go +++ b/src/cmd/compile/internal/ssa/rewritegeneric.go @@ -14724,7 +14724,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool { return true } // match: (Mod16u x (Const16 [c])) - // cond: x.Op != OpConst16 && c > 0 && umagicOK16(c) + // cond: x.Op != OpConst16 && c != 0 // result: (Sub16 x (Mul16 (Div16u x (Const16 [c])) (Const16 [c]))) for { t := v.Type @@ -14733,7 +14733,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool { break } c := auxIntToInt16(v_1.AuxInt) - if !(x.Op != OpConst16 && c > 0 && umagicOK16(c)) { + if !(x.Op != OpConst16 && c != 0) { break } v.reset(OpSub16) @@ -14878,7 +14878,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool { return true } // match: (Mod32u x (Const32 [c])) - // cond: x.Op != OpConst32 && c > 0 && umagicOK32(c) + // cond: x.Op != OpConst32 && c != 0 // result: (Sub32 x (Mul32 (Div32u x (Const32 [c])) (Const32 [c]))) for { t := v.Type @@ -14887,7 +14887,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool { break } c := auxIntToInt32(v_1.AuxInt) - if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) { + if !(x.Op != OpConst32 && c != 0) { break } v.reset(OpSub32) @@ -15043,7 +15043,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool { return true } // match: (Mod64u x (Const64 [c])) - // cond: x.Op != OpConst64 && c > 0 && umagicOK64(c) + // cond: x.Op != OpConst64 && c != 0 // result: (Sub64 x (Mul64 (Div64u x (Const64 [c])) (Const64 [c]))) for { t := v.Type @@ -15052,7 +15052,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool { break } c := auxIntToInt64(v_1.AuxInt) - if !(x.Op != OpConst64 && c > 0 && umagicOK64(c)) { + if !(x.Op != OpConst64 && c != 0) { break } v.reset(OpSub64) @@ -15197,7 +15197,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool { return true } // match: (Mod8u x (Const8 [c])) - // cond: x.Op != OpConst8 && c > 0 && umagicOK8( c) + // cond: x.Op != OpConst8 && c != 0 // result: (Sub8 x (Mul8 (Div8u x (Const8 [c])) (Const8 [c]))) for { t := v.Type @@ -15206,7 +15206,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool { break } c := auxIntToInt8(v_1.AuxInt) - if !(x.Op != OpConst8 && c > 0 && umagicOK8(c)) { + if !(x.Op != OpConst8 && c != 0) { break } v.reset(OpSub8) diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index 06887c934e..bf9e71c170 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1223,7 +1223,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1]) }, - sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64, sys.Loong64) + sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.PPC64, sys.S390X, sys.MIPS64, sys.MIPS, sys.RISCV64, sys.Loong64) alias("math/bits", "Mul", "math/bits", "Mul64", p8...) alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...) addF("math/bits", "Add64", diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go index 5a4e577fb6..9311f84345 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics_test.go +++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go @@ -33,6 +33,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"386", "internal/runtime/sys", "TrailingZeros64"}: struct{}{}, {"386", "internal/runtime/sys", "TrailingZeros8"}: struct{}{}, {"386", "math", "sqrt"}: struct{}{}, + {"386", "math/bits", "Mul64"}: struct{}{}, {"386", "math/bits", "ReverseBytes32"}: struct{}{}, {"386", "math/bits", "ReverseBytes64"}: struct{}{}, {"386", "math/bits", "TrailingZeros16"}: struct{}{}, @@ -208,6 +209,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"arm", "math/bits", "Len32"}: struct{}{}, {"arm", "math/bits", "Len64"}: struct{}{}, {"arm", "math/bits", "Len8"}: struct{}{}, + {"arm", "math/bits", "Mul64"}: struct{}{}, {"arm", "math/bits", "ReverseBytes32"}: struct{}{}, {"arm", "math/bits", "ReverseBytes64"}: struct{}{}, {"arm", "math/bits", "RotateLeft32"}: struct{}{}, @@ -557,6 +559,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"mips", "math/bits", "Len32"}: struct{}{}, {"mips", "math/bits", "Len64"}: struct{}{}, {"mips", "math/bits", "Len8"}: struct{}{}, + {"mips", "math/bits", "Mul64"}: struct{}{}, {"mips", "math/bits", "TrailingZeros16"}: struct{}{}, {"mips", "math/bits", "TrailingZeros32"}: struct{}{}, {"mips", "math/bits", "TrailingZeros64"}: struct{}{}, @@ -806,6 +809,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{ {"mipsle", "math/bits", "Len32"}: struct{}{}, {"mipsle", "math/bits", "Len64"}: struct{}{}, {"mipsle", "math/bits", "Len8"}: struct{}{}, + {"mipsle", "math/bits", "Mul64"}: struct{}{}, {"mipsle", "math/bits", "TrailingZeros16"}: struct{}{}, {"mipsle", "math/bits", "TrailingZeros32"}: struct{}{}, {"mipsle", "math/bits", "TrailingZeros64"}: struct{}{}, diff --git a/src/cmd/compile/internal/walk/expr.go b/src/cmd/compile/internal/walk/expr.go index b9e226b207..989ae0a1db 100644 --- a/src/cmd/compile/internal/walk/expr.go +++ b/src/cmd/compile/internal/walk/expr.go @@ -704,27 +704,21 @@ func walkDivMod(n *ir.BinaryExpr, init *ir.Nodes) ir.Node { // runtime calls late in SSA processing. if types.RegSize < 8 && (et == types.TINT64 || et == types.TUINT64) { if n.Y.Op() == ir.OLITERAL { - // Leave div/mod by constant powers of 2 or small 16-bit constants. + // Leave div/mod by non-zero uint64 constants. // The SSA backend will handle those. + // (Zero constants should have been rejected already, but we check just in case.) switch et { case types.TINT64: - c := ir.Int64Val(n.Y) - if c < 0 { - c = -c - } - if c != 0 && c&(c-1) == 0 { + if ir.Int64Val(n.Y) != 0 { return n } case types.TUINT64: - c := ir.Uint64Val(n.Y) - if c < 1<<16 { - return n - } - if c != 0 && c&(c-1) == 0 { + if ir.Uint64Val(n.Y) != 0 { return n } } } + // Build call to uint64div, uint64mod, int64div, or int64mod. var fn string if et == types.TINT64 { fn = "int64" diff --git a/src/cmd/compile/internal/x86/ssa.go b/src/cmd/compile/internal/x86/ssa.go index d0aad08849..348880f622 100644 --- a/src/cmd/compile/internal/x86/ssa.go +++ b/src/cmd/compile/internal/x86/ssa.go @@ -167,7 +167,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.Op386SBBL: opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg()) - case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry: + case ssa.Op386ADDLcarry, ssa.Op386ADCLcarry, ssa.Op386SUBLcarry: // output 0 is carry/borrow, output 1 is the low 32 bits. opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg()) diff --git a/test/codegen/divmod.go b/test/codegen/divmod.go index 3a78180817..98d0852398 100644 --- a/test/codegen/divmod.go +++ b/test/codegen/divmod.go @@ -279,7 +279,10 @@ func div3_uint32(i uint32) uint32 { } func div3_uint64(i uint64) uint64 { - // 386 "CALL" + // 386: "MOVL [$]-1431655766" + // 386: "MULL" + // 386: "SHRL [$]1" + // 386 -".*CALL" // arm64: "MOVD [$]-6148914691236517205," // arm64: "UMULH" // arm64: "LSR [$]1," @@ -308,7 +311,10 @@ func div14_uint32(i uint32) uint32 { } func div14_uint64(i uint64) uint64 { - // 386 "CALL" + // 386: "MOVL [$]-1840700270," + // 386: "MULL" + // 386: "SHRL [$]2," + // 386: -".*CALL" // arm64: "MOVD [$]-7905747460161236406," // arm64: "UMULH" // arm64: "LSR [$]2," @@ -343,7 +349,10 @@ func div7_uint32(i uint32) uint32 { } func div7_uint64(i uint64) uint64 { - // 386 "CALL" + // 386: "MOVL [$]-1840700269," + // 386: "MULL" + // 386: "SHRL [$]2," + // 386: -".*CALL" // arm64: "MOVD [$]2635249153387078803," // arm64: "UMULH" // arm64: "SUB", @@ -353,7 +362,11 @@ func div7_uint64(i uint64) uint64 { } func div12345_uint64(i uint64) uint64 { - // 386 "CALL" + // 386: "MOVL [$]-1444876402," + // 386: "MOVL [$]835683390," + // 386: "MULL" + // 386: "SHRL [$]13," + // 386: "SHLL [$]19," // arm64: "MOVD [$]-6205696892516465602," // arm64: "UMULH" // arm64: "LSR [$]13," @@ -869,7 +882,12 @@ func ndivis6_int32(i int32) bool { } func divis6_int64(i int64) bool { - // 386 "CALL" + // 386: "IMUL3L [$]-1431655766," + // 386: "IMUL3L [$]-1431655765," + // 386: "ADCL [$]715827882," + // 386: "CMPL .*, [$]715827882" + // 386: "CMPL .*, [$]-1431655766" + // 386: "SETLS" // arm64: "MOVD [$]-6148914691236517205," // arm64: "MUL " // arm64: "MOVD [$]3074457345618258602," @@ -880,7 +898,12 @@ func divis6_int64(i int64) bool { } func ndivis6_int64(i int64) bool { - // 386 "CALL" + // 386: "IMUL3L [$]-1431655766," + // 386: "IMUL3L [$]-1431655765," + // 386: "ADCL [$]715827882," + // 386: "CMPL .*, [$]715827882" + // 386: "CMPL .*, [$]-1431655766" + // 386: "SETHI" // arm64: "MOVD [$]-6148914691236517205," // arm64: "MUL " // arm64: "MOVD [$]3074457345618258602," @@ -973,7 +996,14 @@ func div_ndivis6_uint32(i uint32) (uint32, bool) { } func div_divis6_uint64(i uint64) (uint64, bool) { - // 386 "CALL" + // 386: "MOVL [$]-1431655766," + // 386: "MOVL [$]-1431655765," + // 386: "MULL" + // 386: "SHRL [$]2," + // 386: "SHLL [$]30," + // 386: "SETEQ" + // 386: -".*CALL" + // 386: -"RO[RL]" // arm64: "MOVD [$]-6148914691236517205," // arm64: "UMULH" // arm64: "LSR [$]2," @@ -983,7 +1013,14 @@ func div_divis6_uint64(i uint64) (uint64, bool) { } func div_ndivis6_uint64(i uint64) (uint64, bool) { - // 386 "CALL" + // 386: "MOVL [$]-1431655766," + // 386: "MOVL [$]-1431655765," + // 386: "MULL" + // 386: "SHRL [$]2," + // 386: "SHLL [$]30," + // 386: "SETNE" + // 386: -".*CALL" + // 386: -"RO[RL]" // arm64: "MOVD [$]-6148914691236517205," // arm64: "UMULH" // arm64: "LSR [$]2," @@ -1091,7 +1128,16 @@ func div_ndivis6_int32(i int32) (int32, bool) { } func div_divis6_int64(i int64) (int64, bool) { - // 386 "CALL" + // 386: "ANDL [$]-1431655766," + // 386: "ANDL [$]-1431655765," + // 386: "MOVL [$]-1431655766," + // 386: "MOVL [$]-1431655765," + // 386: "SUBL" "SBBL" + // 386: "MULL" + // 386: "SETEQ" + // 386: -"SET(LS|HI)" + // 386: -".*CALL" + // 386: -"RO[RL]" // arm64: "MOVD [$]-6148914691236517205," // arm64: "SMULH" // arm64: "ADD" @@ -1103,7 +1149,16 @@ func div_divis6_int64(i int64) (int64, bool) { } func div_ndivis6_int64(i int64) (int64, bool) { - // 386 "CALL" + // 386: "ANDL [$]-1431655766," + // 386: "ANDL [$]-1431655765," + // 386: "MOVL [$]-1431655766," + // 386: "MOVL [$]-1431655765," + // 386: "SUBL" "SBBL" + // 386: "MULL" + // 386: "SETNE" + // 386: -"SET(LS|HI)" + // 386: -".*CALL" + // 386: -"RO[RL]" // arm64: "MOVD [$]-6148914691236517205," // arm64: "SMULH" // arm64: "ADD"