]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: implement bits.Mul64 on 32-bit systems
authorRuss Cox <rsc@golang.org>
Mon, 27 Oct 2025 23:41:39 +0000 (19:41 -0400)
committerGopher Robot <gobot@golang.org>
Thu, 30 Oct 2025 15:04:20 +0000 (08:04 -0700)
This CL implements Mul64uhilo, Hmul64, Hmul64u, and Avg64u
on 32-bit systems, with the effect that constant division of both
int64s and uint64s can now be emitted directly in all cases,
and also that bits.Mul64 can be intrinsified on 32-bit systems.

Previously, constant division of uint64s by values 0 ≤ c ≤ 0xFFFF were
implemented as uint32 divisions by c and some fixup. After expanding
those smaller constant divisions, the code for i/999 required:

(386) 7 mul, 10 add, 2 sub, 3 rotate, 3 shift (104 bytes)
(arm) 7 mul, 9 add, 3 sub, 2 shift (104 bytes)
(mips) 7 mul, 10 add, 5 sub, 6 shift, 3 sgtu (176 bytes)

For that much code, we might as well use a full 64x64->128 multiply
that can be used for all divisors, not just small ones.
Having done that, the same i/999 now generates:

(386) 4 mul, 9 add, 2 sub, 2 or, 6 shift (112 bytes)
(arm) 4 mul, 8 add, 2 sub, 2 or, 3 shift (92 bytes)
(mips) 4 mul, 11 add, 3 sub, 6 shift, 8 sgtu, 4 or (196 bytes)

The size increase on 386 is due to a few extra register spills.
The size increase on mips is due to add-with-carry being hard.

The new approach is more general, letting us delete the old special case
and guarantee that all int64 and uint64 divisions by constants are
generated directly on 32-bit systems.

This especially speeds up code making heavy use of bits.Mul64 with
a constant argument, which happens in strconv and various crypto
packages. A few examples are benchmarked below.

pkg: cmd/compile/internal/test

benchmark \ host                      local  linux-amd64       s7  linux-386  s7:GOARCH=386
                                    vs base      vs base  vs base    vs base        vs base
DivconstI64                               ~            ~        ~    -49.66%        -21.02%
ModconstI64                               ~            ~        ~    -13.45%        +14.52%
DivisiblePow2constI64                     ~            ~        ~     +0.97%         -1.32%
DivisibleconstI64                         ~            ~        ~    -20.01%        -48.28%
DivisibleWDivconstI64                     ~            ~   -1.76%    -38.59%        -42.74%
DivconstU64/3                             ~            ~        ~    -13.82%         -4.09%
DivconstU64/5                             ~            ~        ~    -14.10%         -3.54%
DivconstU64/37                       -2.07%       -4.45%        ~    -19.60%         -9.55%
DivconstU64/1234567                       ~            ~        ~    -61.55%        -56.93%
ModconstU64                               ~            ~        ~     -6.25%              ~
DivisibleconstU64                         ~            ~        ~     -2.78%         -7.82%
DivisibleWDivconstU64                     ~            ~        ~     +4.23%         +2.56%

pkg: math/bits

benchmark \ host         s7  linux-amd64  linux-386  s7:GOARCH=386
                    vs base      vs base    vs base        vs base
Add                       ~            ~          ~              ~
Add32                +1.59%            ~          ~              ~
Add64                     ~            ~          ~              ~
Add64multiple             ~            ~          ~              ~
Sub                       ~            ~          ~              ~
Sub32                     ~            ~          ~              ~
Sub64                     ~            ~     -9.20%              ~
Sub64multiple             ~            ~          ~              ~
Mul                       ~            ~          ~              ~
Mul32                     ~            ~          ~              ~
Mul64                     ~            ~    -41.58%        -53.21%
Div                       ~            ~          ~              ~
Div32                     ~            ~          ~              ~
Div64                     ~            ~          ~              ~

pkg: strconv

benchmark \ host                       s7  linux-amd64  linux-386  s7:GOARCH=386
                                  vs base      vs base    vs base        vs base
ParseInt/Pos/7bit                       ~            ~    -11.08%         -6.75%
ParseInt/Pos/26bit                      ~            ~    -13.65%        -11.02%
ParseInt/Pos/31bit                      ~            ~    -14.65%         -9.71%
ParseInt/Pos/56bit                 -1.80%            ~    -17.97%        -10.78%
ParseInt/Pos/63bit                      ~            ~    -13.85%         -9.63%
ParseInt/Neg/7bit                       ~            ~    -12.14%         -7.26%
ParseInt/Neg/26bit                      ~            ~    -14.18%         -9.81%
ParseInt/Neg/31bit                      ~            ~    -14.51%         -9.02%
ParseInt/Neg/56bit                      ~            ~    -15.79%         -9.79%
ParseInt/Neg/63bit                      ~            ~    -15.68%        -11.07%
AppendFloat/Decimal                     ~            ~     -7.25%        -12.26%
AppendFloat/Float                       ~            ~    -15.96%        -19.45%
AppendFloat/Exp                         ~            ~    -13.96%        -17.76%
AppendFloat/NegExp                      ~            ~    -14.89%        -20.27%
AppendFloat/LongExp                     ~            ~    -12.68%        -17.97%
AppendFloat/Big                         ~            ~    -11.10%        -16.64%
AppendFloat/BinaryExp                   ~            ~          ~              ~
AppendFloat/32Integer                   ~            ~    -10.05%        -10.91%
AppendFloat/32ExactFraction             ~            ~     -8.93%        -13.00%
AppendFloat/32Point                     ~            ~    -10.36%        -14.89%
AppendFloat/32Exp                       ~            ~     -9.88%        -13.54%
AppendFloat/32NegExp                    ~            ~    -10.16%        -14.26%
AppendFloat/32Shortest                  ~            ~    -11.39%        -14.96%
AppendFloat/32Fixed8Hard                ~            ~          ~         -2.31%
AppendFloat/32Fixed9Hard                ~            ~          ~         -7.01%
AppendFloat/64Fixed1                    ~            ~     -2.83%         -8.23%
AppendFloat/64Fixed2                    ~            ~          ~         -7.94%
AppendFloat/64Fixed3                    ~            ~     -4.07%         -7.22%
AppendFloat/64Fixed4                    ~            ~     -7.24%         -7.62%
AppendFloat/64Fixed12                   ~            ~     -6.57%         -4.82%
AppendFloat/64Fixed16                   ~            ~     -4.00%         -5.81%
AppendFloat/64Fixed12Hard          -2.22%            ~     -4.07%         -6.35%
AppendFloat/64Fixed17Hard          -2.12%            ~          ~         -3.79%
AppendFloat/64Fixed18Hard          -1.89%            ~     +2.48%              ~
AppendFloat/Slowpath64             -1.85%            ~    -14.49%        -18.21%
AppendFloat/SlowpathDenormal64          ~            ~    -13.08%        -19.41%

pkg: crypto/internal/fips140/nistec/fiat

benchmark \ host         s7  linux-amd64  linux-386  s7:GOARCH=386
                    vs base      vs base    vs base        vs base
Mul/P224                  ~            ~    -29.95%        -39.60%
Mul/P384                  ~            ~    -37.11%        -63.33%
Mul/P521                  ~            ~    -26.62%        -12.42%
Square/P224          +1.46%            ~    -40.62%        -49.18%
Square/P384               ~            ~    -45.51%        -69.68%
Square/P521         +90.37%            ~    -25.26%        -11.23%

(The +90% is a separate problem and not real; that much variation
can be seen on that system by running the same binary from two
different files.)

pkg: crypto/internal/fips140/edwards25519

benchmark \ host                    s7  linux-amd64  linux-386  s7:GOARCH=386
                               vs base      vs base    vs base        vs base
EncodingDecoding                     ~            ~    -34.67%        -35.75%
ScalarBaseMult                       ~            ~    -31.25%        -30.29%
ScalarMult                           ~            ~    -33.45%        -32.54%
VarTimeDoubleScalarBaseMult          ~            ~    -33.78%        -33.68%

Change-Id: Id3c91d42cd01def6731b755e99f8f40c6ad1bb65
Reviewed-on: https://go-review.googlesource.com/c/go/+/716061
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Russ Cox <rsc@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
23 files changed:
src/cmd/compile/internal/arm/ssa.go
src/cmd/compile/internal/ssa/_gen/386.rules
src/cmd/compile/internal/ssa/_gen/386Ops.go
src/cmd/compile/internal/ssa/_gen/ARM.rules
src/cmd/compile/internal/ssa/_gen/ARMOps.go
src/cmd/compile/internal/ssa/_gen/MIPS.rules
src/cmd/compile/internal/ssa/_gen/dec64.rules
src/cmd/compile/internal/ssa/_gen/divmod.rules
src/cmd/compile/internal/ssa/_gen/generic.rules
src/cmd/compile/internal/ssa/_gen/genericOps.go
src/cmd/compile/internal/ssa/_gen/rulegen.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewrite386.go
src/cmd/compile/internal/ssa/rewriteARM.go
src/cmd/compile/internal/ssa/rewriteMIPS.go
src/cmd/compile/internal/ssa/rewritedec64.go
src/cmd/compile/internal/ssa/rewritedivmod.go
src/cmd/compile/internal/ssa/rewritegeneric.go
src/cmd/compile/internal/ssagen/intrinsics.go
src/cmd/compile/internal/ssagen/intrinsics_test.go
src/cmd/compile/internal/walk/expr.go
src/cmd/compile/internal/x86/ssa.go
test/codegen/divmod.go

index a3bfb491b8b25f8df8e745eb5dce8e9ee72feadd..b31ffa474bc6d3066005f3c6556f58e4b0e423ba 100644 (file)
@@ -245,6 +245,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
        case ssa.OpARMADDS,
+               ssa.OpARMADCS,
                ssa.OpARMSUBS:
                r := v.Reg0()
                r1 := v.Args[0].Reg()
index 5f1150241929eb8d3958d8b7d8c8fd841636ab8a..cbe56f7579e6958f2f6d3c03e439dac5d05d2302 100644 (file)
@@ -7,6 +7,7 @@
 (Add(32|64)F ...) => (ADDS(S|D) ...)
 (Add32carry ...) => (ADDLcarry ...)
 (Add32withcarry ...) => (ADCL ...)
+(Add32carrywithcarry ...) => (ADCLcarry ...)
 
 (Sub(Ptr|32|16|8) ...) => (SUBL ...)
 (Sub(32|64)F ...) => (SUBS(S|D) ...)
index 60599a33abb587f22597717e4c240baec3e48ecd..09bfc4226ff8f5a941ba379419392e71591ab199 100644 (file)
@@ -90,22 +90,23 @@ func init() {
 
        // Common regInfo
        var (
-               gp01      = regInfo{inputs: nil, outputs: gponly}
-               gp11      = regInfo{inputs: []regMask{gp}, outputs: gponly}
-               gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
-               gp11sb    = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
-               gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-               gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
-               gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
-               gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly}
-               gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-               gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
-               gp21sb    = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
-               gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
-               gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx}
-               gp21hmul  = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
-               gp11mod   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax}
-               gp21mul   = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}
+               gp01           = regInfo{inputs: nil, outputs: gponly}
+               gp11           = regInfo{inputs: []regMask{gp}, outputs: gponly}
+               gp11sp         = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
+               gp11sb         = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
+               gp21           = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
+               gp11carry      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
+               gp21carry      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
+               gp1carry1      = regInfo{inputs: []regMask{gp}, outputs: gponly}
+               gp2carry1      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
+               gp2carry1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
+               gp21sp         = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
+               gp21sb         = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
+               gp21shift      = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
+               gp11div        = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx}
+               gp21hmul       = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
+               gp11mod        = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax}
+               gp21mul        = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}
 
                gp2flags     = regInfo{inputs: []regMask{gpsp, gpsp}}
                gp1flags     = regInfo{inputs: []regMask{gpsp}}
@@ -181,10 +182,11 @@ func init() {
                {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true},                // arg0 + arg1
                {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32", clobberFlags: true}, // arg0 + auxint
 
-               {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true},                // arg0 + arg1, generates <carry,result> pair
-               {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true},                // arg0 + auxint, generates <carry,result> pair
-               {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags
-               {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags
+               {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true},                          // arg0 + arg1, generates <carry,result> pair
+               {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true},                          // arg0 + auxint, generates <carry,result> pair
+               {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true},           // arg0+arg1+carry(arg2), where arg2 is flags
+               {name: "ADCLcarry", argLength: 3, reg: gp2carry1carry, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags, generates <carry,result> pair
+               {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true},           // arg0+auxint+carry(arg1), where arg1 is flags
 
                {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},                    // arg0 - arg1
                {name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint
index 18b5d6bba6099e21c6b0c3a1448bb4067eeed477..b63ca23de14d6be7701af9e1d14a3af8c08d42cd 100644 (file)
@@ -6,6 +6,7 @@
 (Add(32|64)F ...) => (ADD(F|D) ...)
 (Add32carry ...) => (ADDS ...)
 (Add32withcarry ...) => (ADC ...)
+(Add32carrywithcarry ...) => (ADCS ...)
 
 (Sub(Ptr|32|16|8) ...) => (SUB ...)
 (Sub(32|64)F ...) => (SUB(F|D) ...)
index 01cd48835e2bc4cafa8cc1429d7f39b18a0f8bdf..59bb71b2e3c70a9403bc440d8338b6c7ecbddae3 100644 (file)
@@ -102,36 +102,37 @@ func init() {
        )
        // Common regInfo
        var (
-               gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
-               gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-               gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}}
-               gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
-               gp1flags  = regInfo{inputs: []regMask{gpg}}
-               gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
-               gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
-               gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}}
-               gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
-               gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
-               gp22      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
-               gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-               gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}}
-               gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
-               gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-               gpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
-               gpstore   = regInfo{inputs: []regMask{gpspsbg, gpg}}
-               gp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
-               gp2store  = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
-               fp01      = regInfo{inputs: nil, outputs: []regMask{fp}}
-               fp11      = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
-               fp1flags  = regInfo{inputs: []regMask{fp}}
-               fpgp      = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp
-               gpfp      = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")}
-               fp21      = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
-               fp31      = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
-               fp2flags  = regInfo{inputs: []regMask{fp, fp}}
-               fpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
-               fpstore   = regInfo{inputs: []regMask{gpspsbg, fp}}
-               readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
+               gp01           = regInfo{inputs: nil, outputs: []regMask{gp}}
+               gp11           = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
+               gp11carry      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}}
+               gp11sp         = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
+               gp1flags       = regInfo{inputs: []regMask{gpg}}
+               gp1flags1      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
+               gp21           = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
+               gp21carry      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}}
+               gp2flags       = regInfo{inputs: []regMask{gpg, gpg}}
+               gp2flags1      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
+               gp2flags1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
+               gp22           = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
+               gp31           = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
+               gp31carry      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}}
+               gp3flags       = regInfo{inputs: []regMask{gp, gp, gp}}
+               gp3flags1      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
+               gpload         = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
+               gpstore        = regInfo{inputs: []regMask{gpspsbg, gpg}}
+               gp2load        = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
+               gp2store       = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
+               fp01           = regInfo{inputs: nil, outputs: []regMask{fp}}
+               fp11           = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
+               fp1flags       = regInfo{inputs: []regMask{fp}}
+               fpgp           = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp
+               gpfp           = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")}
+               fp21           = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
+               fp31           = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
+               fp2flags       = regInfo{inputs: []regMask{fp, fp}}
+               fpload         = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
+               fpstore        = regInfo{inputs: []regMask{gpspsbg, fp}}
+               readflags      = regInfo{inputs: nil, outputs: []regMask{gp}}
        )
        ops := []opData{
                // binary ops
@@ -161,16 +162,17 @@ func init() {
                        call:         false, // TODO(mdempsky): Should this be true?
                },
 
-               {name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true}, // arg0 + arg1, set carry flag
-               {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag
-               {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true},  // arg0 + arg1 + carry, arg2=flags
-               {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"},  // arg0 + auxInt + carry, arg1=flags
-               {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"},                    // arg0 - arg1, set carry flag
-               {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag
-               {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag
-               {name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"},                     // arg0 - arg1 - carry, arg2=flags
-               {name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"},  // arg0 - auxInt - carry, arg1=flags
-               {name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"},  // auxInt - arg0 - carry, arg1=flags
+               {name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true},      // arg0 + arg1, set carry flag
+               {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"},      // arg0 + auxInt, set carry flag
+               {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true},       // arg0 + arg1 + carry, arg2=flags
+               {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"},       // arg0 + auxInt + carry, arg1=flags
+               {name: "ADCS", argLength: 3, reg: gp2flags1carry, asm: "ADC", commutative: true}, // arg0 + arg1 + carrry, sets carry
+               {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"},                         // arg0 - arg1, set carry flag
+               {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"},      // arg0 - auxInt, set carry flag
+               {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"},      // auxInt - arg0, set carry flag
+               {name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"},                          // arg0 - arg1 - carry, arg2=flags
+               {name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"},       // arg0 - auxInt - carry, arg1=flags
+               {name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"},       // auxInt - arg0 - carry, arg1=flags
 
                {name: "MULLU", argLength: 2, reg: gp22, asm: "MULLU", commutative: true}, // arg0 * arg1, high 32 bits in out0, low 32 bits in out1
                {name: "MULA", argLength: 3, reg: gp31, asm: "MULA"},                      // arg0 * arg1 + arg2
index 80bf9017f524281441001418c874d47ce1f77859..fe1e00a4e4c6f3afa1d6864ec75959756ddf0e24 100644 (file)
@@ -9,6 +9,12 @@
 (Select1 (Add32carry <t> x y)) => (SGTU <typ.Bool> x (ADD <t.FieldType(0)> x y))
 (Add32withcarry <t> x y c) => (ADD c (ADD <t> x y))
 
+(Select0 (Add32carrywithcarry <t> x y c)) => (ADD <t.FieldType(0)> c (ADD <t.FieldType(0)> x y))
+(Select1 (Add32carrywithcarry <t> x y c)) =>
+       (OR <typ.Bool>
+               (SGTU <typ.Bool> x xy:(ADD <t.FieldType(0)> x y))
+               (SGTU <typ.Bool> xy (ADD <t.FieldType(0)> c xy)))
+
 (Sub(Ptr|32|16|8) ...) => (SUB ...)
 (Sub(32|64)F ...) => (SUB(F|D) ...)
 
index 589c2fcfc1468d9da95c0e846acd7f56c3c41ed7..483818906e6ff8fa71fd488e2671c9eb3a3d3b35 100644 (file)
@@ -6,8 +6,12 @@
 // architectures. These rules work together with the decomposeBuiltin
 // pass which handles phis of these typ.
 
+(Last ___) => v.Args[len(v.Args)-1]
+
 (Int64Hi (Int64Make hi _)) => hi
 (Int64Lo (Int64Make _ lo)) => lo
+(Select0 (MakeTuple x y)) => x
+(Select1 (MakeTuple x y)) => y
 
 (Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() =>
        (Int64Make
     (Arg <typ.UInt32> {n} [off])
     (Arg <typ.UInt32> {n} [off+4]))
 
-(Add64 x y) =>
-       (Int64Make
-               (Add32withcarry <typ.Int32>
-                       (Int64Hi x)
-                       (Int64Hi y)
-                       (Select1 <types.TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
-               (Select0 <typ.UInt32> (Add32carry (Int64Lo x) (Int64Lo y))))
+(Add64 <t> x y) =>
+       (Last <t>
+               x0: (Int64Lo x)
+               x1: (Int64Hi x)
+               y0: (Int64Lo y)
+               y1: (Int64Hi y)
+               add: (Add32carry x0 y0)
+               (Int64Make
+                       (Add32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> add))
+                       (Select0 <typ.UInt32> add)))
+
+(Sub64 <t> x y) =>
+       (Last <t>
+               x0: (Int64Lo x)
+               x1: (Int64Hi x)
+               y0: (Int64Lo y)
+               y1: (Int64Hi y)
+               sub: (Sub32carry x0 y0)
+               (Int64Make
+                       (Sub32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> sub))
+                       (Select0 <typ.UInt32> sub)))
+
+(Mul64 <t> x y) =>
+       (Last <t>
+               x0: (Int64Lo x)
+               x1: (Int64Hi x)
+               y0: (Int64Lo y)
+               y1: (Int64Hi y)
+               x0y0: (Mul32uhilo x0 y0)
+               x0y0Hi: (Select0 <typ.UInt32> x0y0)
+               x0y0Lo: (Select1 <typ.UInt32> x0y0)
+               (Int64Make
+                       (Add32 <typ.UInt32> x0y0Hi
+                               (Add32 <typ.UInt32>
+                                       (Mul32 <typ.UInt32> x0 y1)
+                                       (Mul32 <typ.UInt32> x1 y0)))
+                       x0y0Lo))
+
+(Mul64uhilo <t> x y) =>
+       (Last <t>
+               x0: (Int64Lo x)
+               x1: (Int64Hi x)
+               y0: (Int64Lo y)
+               y1: (Int64Hi y)
+               x0y0: (Mul32uhilo x0 y0)
+               x0y1: (Mul32uhilo x0 y1)
+               x1y0: (Mul32uhilo x1 y0)
+               x1y1: (Mul32uhilo x1 y1)
+               x0y0Hi: (Select0 <typ.UInt32> x0y0)
+               x0y0Lo: (Select1 <typ.UInt32> x0y0)
+               x0y1Hi: (Select0 <typ.UInt32> x0y1)
+               x0y1Lo: (Select1 <typ.UInt32> x0y1)
+               x1y0Hi: (Select0 <typ.UInt32> x1y0)
+               x1y0Lo: (Select1 <typ.UInt32> x1y0)
+               x1y1Hi: (Select0 <typ.UInt32> x1y1)
+               x1y1Lo: (Select1 <typ.UInt32> x1y1)
+               w1a: (Add32carry x0y0Hi x0y1Lo)
+               w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 <types.TypeFlags> w1a))
+               w3a: (Add32withcarry <typ.UInt32> x1y1Hi (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2a))
+               w1b: (Add32carry x1y0Lo (Select0 <typ.UInt32> w1a))
+               w2b: (Add32carrywithcarry x1y1Lo (Select0 <typ.UInt32> w2a) (Select1 <types.TypeFlags> w1b))
+               w3b: (Add32withcarry <typ.UInt32> w3a (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2b))
+               (MakeTuple <types.NewTuple(typ.UInt64,typ.UInt64)>
+                       (Int64Make w3b (Select0 <typ.UInt32> w2b))
+                       (Int64Make (Select0 <typ.UInt32> w1b) x0y0Lo)))
+
+(Hmul64u x y) => (Select0 (Mul64uhilo x y))
+
+// Hacker's Delight p. 175: signed hmul = unsigned hmul - (x<0)&y - (y<0)&x.
+(Hmul64 x y) =>
+       (Last
+               p: (Hmul64u <typ.UInt64> x y)
+               xSign: (Int64Make xs:(Rsh32x32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [31])) xs)
+               ySign: (Int64Make ys:(Rsh32x32 <typ.UInt32> (Int64Hi y) (Const32 <typ.UInt32> [31])) ys)
+               (Sub64 <typ.Int64> (Sub64 <typ.Int64> p (And64 <typ.Int64> xSign y)) (And64 <typ.Int64> ySign x)))
+
+// (x+y)/2 => (x-y)/2 + y
+(Avg64u <t> x y) => (Add64 (Rsh64Ux32 <t> (Sub64 <t> x y) (Const32 <typ.UInt32> [1])) y)
 
-(Sub64 x y) =>
-       (Int64Make
-               (Sub32withcarry <typ.Int32>
-                       (Int64Hi x)
-                       (Int64Hi y)
-                       (Select1 <types.TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
-               (Select0 <typ.UInt32> (Sub32carry (Int64Lo x) (Int64Lo y))))
-
-(Mul64 x y) =>
-       (Int64Make
-               (Add32 <typ.UInt32>
-                       (Mul32 <typ.UInt32> (Int64Lo x) (Int64Hi y))
-                       (Add32 <typ.UInt32>
-                               (Mul32 <typ.UInt32> (Int64Hi x) (Int64Lo y))
-                               (Select0 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y)))))
-               (Select1 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y))))
 
 (And64 x y) =>
        (Int64Make
index c7c9e132095cca9e35e1766baa9daebc6e1b8dfe..21e0a194068df8f5a76f7accb087a2566dc7f507 100644 (file)
       (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m/2)]))
       (Const64 <typ.UInt64> [smagic32(c).s - 1]))
     (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
-(Div64 <t> x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul =>
+(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul =>
   (Sub64 <t>
     (Rsh64x64 <t>
       (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)]))
       (Add32 <t> x (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m)])))
       (Const64 <typ.UInt64> [smagic32(c).s]))
     (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
-(Div64 <t> x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul =>
+(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul =>
   (Sub64 <t>
     (Rsh64x64 <t>
       (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])))
   (Rsh32Ux64 <t>
     (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(smagic32(c).m)]))
     (Const64 <typ.UInt64> [smagic32(c).s]))
-(Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul =>
+(Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.useHmul =>
   (Rsh64Ux64 <t>
     (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))
     (Const64 <typ.UInt64> [smagic64(c).s]))
   (Rsh32Ux64 <t>
     (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(1<<31 + umagic32(c).m/2)]))
     (Const64 <typ.UInt64> [umagic32(c).s - 1]))
-(Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul =>
+(Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul =>
   (Rsh64Ux64 <t>
     (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)]))
     (Const64 <typ.UInt64> [umagic64(c).s - 1]))
       (Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1]))
       (Const32 <typ.UInt32> [int32(1<<31 + (umagic32(c).m+1)/2)]))
     (Const64 <typ.UInt64> [umagic32(c).s - 2]))
-(Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul =>
+(Div64u <t> x (Const64 [c])) && umagicOK64(c) && c&1 == 0 && config.useHmul =>
   (Rsh64Ux64 <t>
     (Hmul64u <typ.UInt64>
       (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1]))
   (Rsh32Ux64 <t>
     (Avg32u x (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(umagic32(c).m)])))
     (Const64 <typ.UInt64> [umagic32(c).s - 1]))
-(Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul =>
+(Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.useAvg && config.useHmul =>
   (Rsh64Ux64 <t>
     (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)])))
     (Const64 <typ.UInt64> [umagic64(c).s - 1]))
-
-// Case 9. For unsigned 64-bit divides on 32-bit machines,
-// if the constant fits in 16 bits (so that the last term
-// fits in 32 bits), convert to three 32-bit divides by a constant.
-//
-// If 1<<32 = Q * c + R
-// and    x = hi << 32 + lo
-//
-// Then x = (hi/c*c + hi%c) << 32 + lo
-//        = hi/c*c<<32 + hi%c<<32 + lo
-//        = hi/c*c<<32 + (hi%c)*(Q*c+R) + lo/c*c + lo%c
-//        = hi/c*c<<32 + (hi%c)*Q*c + lo/c*c + (hi%c*R+lo%c)
-// and x / c = (hi/c)<<32 + (hi%c)*Q + lo/c + (hi%c*R+lo%c)/c
-(Div64u x (Const64 [c])) && c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul =>
-  (Add64
-    (Add64 <typ.UInt64>
-      (Add64 <typ.UInt64>
-        (Lsh64x64 <typ.UInt64>
-          (ZeroExt32to64
-            (Div32u <typ.UInt32>
-              (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
-              (Const32 <typ.UInt32> [int32(c)])))
-          (Const64 <typ.UInt64> [32]))
-        (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)]))))
-      (Mul64 <typ.UInt64>
-        (ZeroExt32to64 <typ.UInt64>
-          (Mod32u <typ.UInt32>
-            (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
-            (Const32 <typ.UInt32> [int32(c)])))
-        (Const64 <typ.UInt64> [int64((1<<32)/c)])))
-      (ZeroExt32to64
-        (Div32u <typ.UInt32>
-          (Add32 <typ.UInt32>
-            (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)]))
-            (Mul32 <typ.UInt32>
-              (Mod32u <typ.UInt32>
-                (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
-                (Const32 <typ.UInt32> [int32(c)]))
-              (Const32 <typ.UInt32> [int32((1<<32)%c)])))
-          (Const32 <typ.UInt32> [int32(c)]))))
-
-// Repeated from generic.rules, for expanding the expression above
-// (which can then be further expanded to handle the nested Div32u).
-(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c)
-  => (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
index 3f026448326005c81f11ad38696299285c86edb2..7e3aba1e5ee8d5282f018b3e7ebb3a667280a55e 100644 (file)
   => (Sub32 x (Mul32 <t> (Div32  <t> x (Const32 <t> [c])) (Const32 <t> [c])))
 (Mod64  <t> x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63)
   => (Sub64 x (Mul64 <t> (Div64  <t> x (Const64 <t> [c])) (Const64 <t> [c])))
-(Mod8u  <t> x (Const8  [c])) && x.Op != OpConst8  && c > 0 && umagicOK8( c)
+(Mod8u  <t> x (Const8  [c])) && x.Op != OpConst8  && c != 0
   => (Sub8  x (Mul8  <t> (Div8u  <t> x (Const8  <t> [c])) (Const8  <t> [c])))
-(Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c > 0 && umagicOK16(c)
+(Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c != 0
   => (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
-(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c)
+(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c != 0
   => (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
-(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK64(c)
+(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c != 0
   => (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
 
 // Set up for mod->mul+rot optimization in genericlateopt.rules.
index 1f6ad4e16d98f8803ed54473ac360db487bb2b48..09fb4bf03f943cac38e04119c156d404e2740cbc 100644 (file)
@@ -16,6 +16,9 @@ package main
 // are signed or unsigned.
 
 var genericOps = []opData{
+       // Pseudo-op.
+       {name: "Last", argLength: -1}, // return last element of tuple; for "let" bindings
+
        // 2-input arithmetic
        // Types must be consistent with Go typing. Add, for example, must take two values
        // of the same type and produces that same type.
@@ -557,8 +560,9 @@ var genericOps = []opData{
        {name: "Int64Hi", argLength: 1, typ: "UInt32"},   // high 32-bit of arg0
        {name: "Int64Lo", argLength: 1, typ: "UInt32"},   // low 32-bit of arg0
 
-       {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry)
-       {name: "Add32withcarry", argLength: 3, commutative: true},                    // arg0 + arg1 + arg2, arg2=carry (0 or 1)
+       {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"},          // arg0 + arg1, returns (value, carry)
+       {name: "Add32withcarry", argLength: 3, commutative: true},                             // arg0 + arg1 + arg2, arg2=carry (0 or 1)
+       {name: "Add32carrywithcarry", argLength: 3, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1 + arg2, arg2=carry, returns (value, carry)
 
        {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry)
        {name: "Sub32withcarry", argLength: 3},                    // arg0 - arg1 - arg2, arg2=carry (0 or 1)
index f818b46511d04a08f0b4f46dad689e990935e549..e3a10707fed81d8521b9d510e09ff69db4fcb0c9 100644 (file)
@@ -1271,8 +1271,10 @@ func genResult0(rr *RuleRewrite, arch arch, result string, top, move bool, pos s
        case 0:
        case 1:
                rr.add(stmtf("%s.AddArg(%s)", v, all.String()))
-       default:
+       case 2, 3, 4, 5, 6:
                rr.add(stmtf("%s.AddArg%d(%s)", v, len(args), all.String()))
+       default:
+               rr.add(stmtf("%s.AddArgs(%s)", v, all.String()))
        }
 
        if cse != nil {
@@ -1313,6 +1315,12 @@ outer:
                                d++
                        case d > 0 && s[i] == close:
                                d--
+                       case s[i] == ':':
+                               // ignore spaces after colons
+                               nonsp = true
+                               for i+1 < len(s) && (s[i+1] == ' ' || s[i+1] == '\t') {
+                                       i++
+                               }
                        default:
                                nonsp = true
                        }
@@ -1347,7 +1355,7 @@ func extract(val string) (op, typ, auxint, aux string, args []string) {
        val = val[1 : len(val)-1] // remove ()
 
        // Split val up into regions.
-       // Split by spaces/tabs, except those contained in (), {}, [], or <>.
+       // Split by spaces/tabs, except those contained in (), {}, [], or <> or after colon.
        s := split(val)
 
        // Extract restrictions and args.
@@ -1471,7 +1479,7 @@ func splitNameExpr(arg string) (name, expr string) {
                // colon is inside the parens, such as in "(Foo x:(Bar))".
                return "", arg
        }
-       return arg[:colon], arg[colon+1:]
+       return arg[:colon], strings.TrimSpace(arg[colon+1:])
 }
 
 func getBlockInfo(op string, arch arch) (name string, data blockData) {
index 16a983a56878d033f4f0721638e924d2f6dded62..264f4b3bf378f1301e86bf19bd69baf3182f68d4 100644 (file)
@@ -386,6 +386,7 @@ const (
        Op386ADDLcarry
        Op386ADDLconstcarry
        Op386ADCL
+       Op386ADCLcarry
        Op386ADCLconst
        Op386SUBL
        Op386SUBLconst
@@ -1182,6 +1183,7 @@ const (
        OpARMADDSconst
        OpARMADC
        OpARMADCconst
+       OpARMADCS
        OpARMSUBS
        OpARMSUBSconst
        OpARMRSBSconst
@@ -3010,6 +3012,7 @@ const (
        OpWasmI64Rotl
        OpWasmI64Popcnt
 
+       OpLast
        OpAdd8
        OpAdd16
        OpAdd32
@@ -3336,6 +3339,7 @@ const (
        OpInt64Lo
        OpAdd32carry
        OpAdd32withcarry
+       OpAdd32carrywithcarry
        OpSub32carry
        OpSub32withcarry
        OpAdd64carry
@@ -3968,6 +3972,24 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "ADCLcarry",
+               argLen:       3,
+               commutative:  true,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.AADCL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                               {1, 239}, // AX CX DX BX BP SI DI
+                       },
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+               },
+       },
        {
                name:         "ADCLconst",
                auxType:      auxInt32,
@@ -15792,6 +15814,22 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:        "ADCS",
+               argLen:      3,
+               commutative: true,
+               asm:         arm.AADC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                               {1, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+               },
+       },
        {
                name:   "SUBS",
                argLen: 2,
@@ -40672,6 +40710,11 @@ var opcodeTable = [...]opInfo{
                },
        },
 
+       {
+               name:    "Last",
+               argLen:  -1,
+               generic: true,
+       },
        {
                name:        "Add8",
                argLen:      2,
@@ -42480,6 +42523,12 @@ var opcodeTable = [...]opInfo{
                commutative: true,
                generic:     true,
        },
+       {
+               name:        "Add32carrywithcarry",
+               argLen:      3,
+               commutative: true,
+               generic:     true,
+       },
        {
                name:    "Sub32carry",
                argLen:  2,
index 0495438710659e78b8b472f9c2be9ec140a25067..be88dd3cddadf721e9ce8e69239ef1a248adca5d 100644 (file)
@@ -257,6 +257,9 @@ func rewriteValue386(v *Value) bool {
        case OpAdd32carry:
                v.Op = Op386ADDLcarry
                return true
+       case OpAdd32carrywithcarry:
+               v.Op = Op386ADCLcarry
+               return true
        case OpAdd32withcarry:
                v.Op = Op386ADCL
                return true
index 44380cf8f57d85ba4de8c389cd76b45883fc2fd2..2a90e7b433bd6e0336192701fc622319cd9189af 100644 (file)
@@ -446,6 +446,9 @@ func rewriteValueARM(v *Value) bool {
        case OpAdd32carry:
                v.Op = OpARMADDS
                return true
+       case OpAdd32carrywithcarry:
+               v.Op = OpARMADCS
+               return true
        case OpAdd32withcarry:
                v.Op = OpARMADC
                return true
index fda02e64d19d7085beee9e88783c5ed8ea5941fa..ff696337ef82990bf5d553a07470c33929407e23 100644 (file)
@@ -6562,6 +6562,23 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                v.AddArg2(x, y)
                return true
        }
+       // match: (Select0 (Add32carrywithcarry <t> x y c))
+       // result: (ADD <t.FieldType(0)> c (ADD <t.FieldType(0)> x y))
+       for {
+               if v_0.Op != OpAdd32carrywithcarry {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpMIPSADD)
+               v.Type = t.FieldType(0)
+               v0 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0))
+               v0.AddArg2(x, y)
+               v.AddArg2(c, v0)
+               return true
+       }
        // match: (Select0 (Sub32carry <t> x y))
        // result: (SUB <t.FieldType(0)> x y)
        for {
@@ -6759,6 +6776,29 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                v.AddArg2(x, v0)
                return true
        }
+       // match: (Select1 (Add32carrywithcarry <t> x y c))
+       // result: (OR <typ.Bool> (SGTU <typ.Bool> x xy:(ADD <t.FieldType(0)> x y)) (SGTU <typ.Bool> xy (ADD <t.FieldType(0)> c xy)))
+       for {
+               if v_0.Op != OpAdd32carrywithcarry {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpMIPSOR)
+               v.Type = typ.Bool
+               v0 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool)
+               xy := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0))
+               xy.AddArg2(x, y)
+               v0.AddArg2(x, xy)
+               v2 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool)
+               v3 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0))
+               v3.AddArg2(c, xy)
+               v2.AddArg2(xy, v3)
+               v.AddArg2(v0, v2)
+               return true
+       }
        // match: (Select1 (Sub32carry <t> x y))
        // result: (SGTU <typ.Bool> (SUB <t.FieldType(0)> x y) x)
        for {
index b4da78fd5229bf04d3f644679593c106af6c8f7f..a0388551b5301581ecfaf3154d2fa5f7019a8840 100644 (file)
@@ -12,6 +12,8 @@ func rewriteValuedec64(v *Value) bool {
                return rewriteValuedec64_OpAnd64(v)
        case OpArg:
                return rewriteValuedec64_OpArg(v)
+       case OpAvg64u:
+               return rewriteValuedec64_OpAvg64u(v)
        case OpBitLen64:
                return rewriteValuedec64_OpBitLen64(v)
        case OpBswap64:
@@ -27,10 +29,16 @@ func rewriteValuedec64(v *Value) bool {
                return true
        case OpEq64:
                return rewriteValuedec64_OpEq64(v)
+       case OpHmul64:
+               return rewriteValuedec64_OpHmul64(v)
+       case OpHmul64u:
+               return rewriteValuedec64_OpHmul64u(v)
        case OpInt64Hi:
                return rewriteValuedec64_OpInt64Hi(v)
        case OpInt64Lo:
                return rewriteValuedec64_OpInt64Lo(v)
+       case OpLast:
+               return rewriteValuedec64_OpLast(v)
        case OpLeq64:
                return rewriteValuedec64_OpLeq64(v)
        case OpLeq64U:
@@ -57,6 +65,8 @@ func rewriteValuedec64(v *Value) bool {
                return rewriteValuedec64_OpLsh8x64(v)
        case OpMul64:
                return rewriteValuedec64_OpMul64(v)
+       case OpMul64uhilo:
+               return rewriteValuedec64_OpMul64uhilo(v)
        case OpNeg64:
                return rewriteValuedec64_OpNeg64(v)
        case OpNeq64:
@@ -101,6 +111,10 @@ func rewriteValuedec64(v *Value) bool {
                return rewriteValuedec64_OpRsh8Ux64(v)
        case OpRsh8x64:
                return rewriteValuedec64_OpRsh8x64(v)
+       case OpSelect0:
+               return rewriteValuedec64_OpSelect0(v)
+       case OpSelect1:
+               return rewriteValuedec64_OpSelect1(v)
        case OpSignExt16to64:
                return rewriteValuedec64_OpSignExt16to64(v)
        case OpSignExt32to64:
@@ -133,29 +147,33 @@ func rewriteValuedec64_OpAdd64(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Add64 x y)
-       // result: (Int64Make (Add32withcarry <typ.Int32> (Int64Hi x) (Int64Hi y) (Select1 <types.TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select0 <typ.UInt32> (Add32carry (Int64Lo x) (Int64Lo y))))
+       // match: (Add64 <t> x y)
+       // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) add: (Add32carry x0 y0) (Int64Make (Add32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> add)) (Select0 <typ.UInt32> add)))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpInt64Make)
-               v0 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.Int32)
-               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4 := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v5.AddArg(x)
-               v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v6.AddArg(y)
-               v4.AddArg2(v5, v6)
-               v3.AddArg(v4)
-               v0.AddArg3(v1, v2, v3)
-               v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
-               v7.AddArg(v4)
-               v.AddArg2(v0, v7)
+               v.reset(OpLast)
+               v.Type = t
+               x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               x0.AddArg(x)
+               x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               x1.AddArg(x)
+               y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               y0.AddArg(y)
+               y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               y1.AddArg(y)
+               add := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               add.AddArg2(x0, y0)
+               v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v6 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v7.AddArg(add)
+               v6.AddArg3(x1, y1, v7)
+               v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v8.AddArg(add)
+               v5.AddArg2(v6, v8)
+               v.AddArg6(x0, x1, y0, y1, add, v5)
                return true
        }
 }
@@ -268,6 +286,28 @@ func rewriteValuedec64_OpArg(v *Value) bool {
        }
        return false
 }
+func rewriteValuedec64_OpAvg64u(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Avg64u <t> x y)
+       // result: (Add64 (Rsh64Ux32 <t> (Sub64 <t> x y) (Const32 <typ.UInt32> [1])) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpRsh64Ux32, t)
+               v1 := b.NewValue0(v.Pos, OpSub64, t)
+               v1.AddArg2(x, y)
+               v2 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v2.AuxInt = int32ToAuxInt(1)
+               v0.AddArg2(v1, v2)
+               v.AddArg2(v0, y)
+               return true
+       }
+}
 func rewriteValuedec64_OpBitLen64(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
@@ -430,6 +470,62 @@ func rewriteValuedec64_OpEq64(v *Value) bool {
                return true
        }
 }
+func rewriteValuedec64_OpHmul64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Hmul64 x y)
+       // result: (Last p: (Hmul64u <typ.UInt64> x y) xSign: (Int64Make xs:(Rsh32x32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [31])) xs) ySign: (Int64Make ys:(Rsh32x32 <typ.UInt32> (Int64Hi y) (Const32 <typ.UInt32> [31])) ys) (Sub64 <typ.Int64> (Sub64 <typ.Int64> p (And64 <typ.Int64> xSign y)) (And64 <typ.Int64> ySign x)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpLast)
+               p := b.NewValue0(v.Pos, OpHmul64u, typ.UInt64)
+               p.AddArg2(x, y)
+               xSign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               xs := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32)
+               v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v3.AddArg(x)
+               v4 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v4.AuxInt = int32ToAuxInt(31)
+               xs.AddArg2(v3, v4)
+               xSign.AddArg2(xs, xs)
+               ySign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               ys := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v7.AddArg(y)
+               ys.AddArg2(v7, v4)
+               ySign.AddArg2(ys, ys)
+               v8 := b.NewValue0(v.Pos, OpSub64, typ.Int64)
+               v9 := b.NewValue0(v.Pos, OpSub64, typ.Int64)
+               v10 := b.NewValue0(v.Pos, OpAnd64, typ.Int64)
+               v10.AddArg2(xSign, y)
+               v9.AddArg2(p, v10)
+               v11 := b.NewValue0(v.Pos, OpAnd64, typ.Int64)
+               v11.AddArg2(ySign, x)
+               v8.AddArg2(v9, v11)
+               v.AddArg4(p, xSign, ySign, v8)
+               return true
+       }
+}
+func rewriteValuedec64_OpHmul64u(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Hmul64u x y)
+       // result: (Select0 (Mul64uhilo x y))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpMul64uhilo, types.NewTuple(typ.UInt64, typ.UInt64))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
+       }
+}
 func rewriteValuedec64_OpInt64Hi(v *Value) bool {
        v_0 := v.Args[0]
        // match: (Int64Hi (Int64Make hi _))
@@ -458,6 +554,14 @@ func rewriteValuedec64_OpInt64Lo(v *Value) bool {
        }
        return false
 }
+func rewriteValuedec64_OpLast(v *Value) bool {
+       // match: (Last ___)
+       // result: v.Args[len(v.Args)-1]
+       for {
+               v.copyOf(v.Args[len(v.Args)-1])
+               return true
+       }
+}
 func rewriteValuedec64_OpLeq64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -1114,35 +1218,124 @@ func rewriteValuedec64_OpMul64(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Mul64 x y)
-       // result: (Int64Make (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Int64Lo x) (Int64Hi y)) (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Int64Hi x) (Int64Lo y)) (Select0 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y))))) (Select1 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y))))
+       // match: (Mul64 <t> x y)
+       // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y0Hi: (Select0 <typ.UInt32> x0y0) x0y0Lo: (Select1 <typ.UInt32> x0y0) (Int64Make (Add32 <typ.UInt32> x0y0Hi (Add32 <typ.UInt32> (Mul32 <typ.UInt32> x0 y1) (Mul32 <typ.UInt32> x1 y0))) x0y0Lo))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpInt64Make)
-               v0 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
-               v1 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
-               v2 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v2.AddArg(x)
-               v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v3.AddArg(y)
-               v1.AddArg2(v2, v3)
-               v4 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
-               v6 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v6.AddArg(x)
-               v7 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v7.AddArg(y)
-               v5.AddArg2(v6, v7)
-               v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
-               v9 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
-               v9.AddArg2(v2, v7)
-               v8.AddArg(v9)
-               v4.AddArg2(v5, v8)
-               v0.AddArg2(v1, v4)
-               v10 := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
-               v10.AddArg(v9)
-               v.AddArg2(v0, v10)
+               v.reset(OpLast)
+               v.Type = t
+               x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               x0.AddArg(x)
+               x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               x1.AddArg(x)
+               y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               y0.AddArg(y)
+               y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               y1.AddArg(y)
+               x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x0y0.AddArg2(x0, y0)
+               x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x0y0Hi.AddArg(x0y0)
+               x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x0y0Lo.AddArg(x0y0)
+               v7 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v8 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
+               v9 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
+               v10 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
+               v10.AddArg2(x0, y1)
+               v11 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
+               v11.AddArg2(x1, y0)
+               v9.AddArg2(v10, v11)
+               v8.AddArg2(x0y0Hi, v9)
+               v7.AddArg2(v8, x0y0Lo)
+               v.AddArgs(x0, x1, y0, y1, x0y0, x0y0Hi, x0y0Lo, v7)
+               return true
+       }
+}
+func rewriteValuedec64_OpMul64uhilo(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Mul64uhilo <t> x y)
+       // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y1: (Mul32uhilo x0 y1) x1y0: (Mul32uhilo x1 y0) x1y1: (Mul32uhilo x1 y1) x0y0Hi: (Select0 <typ.UInt32> x0y0) x0y0Lo: (Select1 <typ.UInt32> x0y0) x0y1Hi: (Select0 <typ.UInt32> x0y1) x0y1Lo: (Select1 <typ.UInt32> x0y1) x1y0Hi: (Select0 <typ.UInt32> x1y0) x1y0Lo: (Select1 <typ.UInt32> x1y0) x1y1Hi: (Select0 <typ.UInt32> x1y1) x1y1Lo: (Select1 <typ.UInt32> x1y1) w1a: (Add32carry x0y0Hi x0y1Lo) w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 <types.TypeFlags> w1a)) w3a: (Add32withcarry <typ.UInt32> x1y1Hi (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2a)) w1b: (Add32carry x1y0Lo (Select0 <typ.UInt32> w1a)) w2b: (Add32carrywithcarry x1y1Lo (Select0 <typ.UInt32> w2a) (Select1 <types.TypeFlags> w1b)) w3b: (Add32withcarry <typ.UInt32> w3a (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2b)) (MakeTuple <types.NewTuple(typ.UInt64,typ.UInt64)> (Int64Make w3b (Select0 <typ.UInt32> w2b)) (Int64Make (Select0 <typ.UInt32> w1b) x0y0Lo)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               v.reset(OpLast)
+               v.Type = t
+               x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               x0.AddArg(x)
+               x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               x1.AddArg(x)
+               y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               y0.AddArg(y)
+               y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               y1.AddArg(y)
+               x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x0y0.AddArg2(x0, y0)
+               x0y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x0y1.AddArg2(x0, y1)
+               x1y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x1y0.AddArg2(x1, y0)
+               x1y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x1y1.AddArg2(x1, y1)
+               x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x0y0Hi.AddArg(x0y0)
+               x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x0y0Lo.AddArg(x0y0)
+               x0y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x0y1Hi.AddArg(x0y1)
+               x0y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x0y1Lo.AddArg(x0y1)
+               x1y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x1y0Hi.AddArg(x1y0)
+               x1y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x1y0Lo.AddArg(x1y0)
+               x1y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x1y1Hi.AddArg(x1y1)
+               x1y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x1y1Lo.AddArg(x1y1)
+               w1a := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               w1a.AddArg2(x0y0Hi, x0y1Lo)
+               w2a := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v18 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v18.AddArg(w1a)
+               w2a.AddArg3(x0y1Hi, x1y0Hi, v18)
+               w3a := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32)
+               v20 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v20.AuxInt = int32ToAuxInt(0)
+               v21 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v21.AddArg(w2a)
+               w3a.AddArg3(x1y1Hi, v20, v21)
+               w1b := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v23 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v23.AddArg(w1a)
+               w1b.AddArg2(x1y0Lo, v23)
+               w2b := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v25 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v25.AddArg(w2a)
+               v26 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v26.AddArg(w1b)
+               w2b.AddArg3(x1y1Lo, v25, v26)
+               w3b := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32)
+               v28 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v28.AddArg(w2b)
+               w3b.AddArg3(w3a, v20, v28)
+               v29 := b.NewValue0(v.Pos, OpMakeTuple, types.NewTuple(typ.UInt64, typ.UInt64))
+               v30 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v31 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v31.AddArg(w2b)
+               v30.AddArg2(w3b, v31)
+               v32 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v33 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v33.AddArg(w1b)
+               v32.AddArg2(v33, x0y0Lo)
+               v29.AddArg2(v30, v32)
+               v.AddArgs(x0, x1, y0, y1, x0y0, x0y1, x1y0, x1y1, x0y0Hi, x0y0Lo, x0y1Hi, x0y1Lo, x1y0Hi, x1y0Lo, x1y1Hi, x1y1Lo, w1a, w2a, w3a, w1b, w2b, w3b, v29)
                return true
        }
 }
@@ -2705,6 +2898,34 @@ func rewriteValuedec64_OpRsh8x64(v *Value) bool {
                return true
        }
 }
+func rewriteValuedec64_OpSelect0(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Select0 (MakeTuple x y))
+       // result: x
+       for {
+               if v_0.Op != OpMakeTuple {
+                       break
+               }
+               x := v_0.Args[0]
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
+func rewriteValuedec64_OpSelect1(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Select1 (MakeTuple x y))
+       // result: y
+       for {
+               if v_0.Op != OpMakeTuple {
+                       break
+               }
+               y := v_0.Args[1]
+               v.copyOf(y)
+               return true
+       }
+       return false
+}
 func rewriteValuedec64_OpSignExt16to64(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
@@ -2815,29 +3036,33 @@ func rewriteValuedec64_OpSub64(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
        typ := &b.Func.Config.Types
-       // match: (Sub64 x y)
-       // result: (Int64Make (Sub32withcarry <typ.Int32> (Int64Hi x) (Int64Hi y) (Select1 <types.TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select0 <typ.UInt32> (Sub32carry (Int64Lo x) (Int64Lo y))))
+       // match: (Sub64 <t> x y)
+       // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) sub: (Sub32carry x0 y0) (Int64Make (Sub32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> sub)) (Select0 <typ.UInt32> sub)))
        for {
+               t := v.Type
                x := v_0
                y := v_1
-               v.reset(OpInt64Make)
-               v0 := b.NewValue0(v.Pos, OpSub32withcarry, typ.Int32)
-               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4 := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v5.AddArg(x)
-               v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v6.AddArg(y)
-               v4.AddArg2(v5, v6)
-               v3.AddArg(v4)
-               v0.AddArg3(v1, v2, v3)
-               v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
-               v7.AddArg(v4)
-               v.AddArg2(v0, v7)
+               v.reset(OpLast)
+               v.Type = t
+               x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               x0.AddArg(x)
+               x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               x1.AddArg(x)
+               y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               y0.AddArg(y)
+               y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               y1.AddArg(y)
+               sub := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               sub.AddArg2(x0, y0)
+               v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v6 := b.NewValue0(v.Pos, OpSub32withcarry, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v7.AddArg(sub)
+               v6.AddArg3(x1, y1, v7)
+               v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v8.AddArg(sub)
+               v5.AddArg2(v6, v8)
+               v.AddArg6(x0, x1, y0, y1, sub, v5)
                return true
        }
 }
index fc37d84999f477c6702c3991a198ffbfb2ead8d8..02978075a8aede5ffce009f2042482fc9ec41fbc 100644 (file)
@@ -20,8 +20,6 @@ func rewriteValuedivmod(v *Value) bool {
                return rewriteValuedivmod_OpDiv8(v)
        case OpDiv8u:
                return rewriteValuedivmod_OpDiv8u(v)
-       case OpMod32u:
-               return rewriteValuedivmod_OpMod32u(v)
        }
        return false
 }
@@ -646,7 +644,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
                return true
        }
        // match: (Div64 <t> x (Const64 [c]))
-       // cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul
+       // cond: smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul
        // result: (Sub64 <t> (Rsh64x64 <t> (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)])) (Const64 <typ.UInt64> [smagic64(c).s - 1])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
        for {
                t := v.Type
@@ -655,7 +653,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
-               if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul) {
+               if !(smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul) {
                        break
                }
                v.reset(OpSub64)
@@ -676,7 +674,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
                return true
        }
        // match: (Div64 <t> x (Const64 [c]))
-       // cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul
+       // cond: smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul
        // result: (Sub64 <t> (Rsh64x64 <t> (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))) (Const64 <typ.UInt64> [smagic64(c).s])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
        for {
                t := v.Type
@@ -685,7 +683,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
-               if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul) {
+               if !(smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul) {
                        break
                }
                v.reset(OpSub64)
@@ -716,7 +714,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
        config := b.Func.Config
        typ := &b.Func.Config.Types
        // match: (Div64u <t> x (Const64 [c]))
-       // cond: t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul
+       // cond: t.IsSigned() && smagicOK64(c) && config.useHmul
        // result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])) (Const64 <typ.UInt64> [smagic64(c).s]))
        for {
                t := v.Type
@@ -725,7 +723,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
-               if !(t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul) {
+               if !(t.IsSigned() && smagicOK64(c) && config.useHmul) {
                        break
                }
                v.reset(OpRsh64Ux64)
@@ -740,7 +738,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                return true
        }
        // match: (Div64u <t> x (Const64 [c]))
-       // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul
+       // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul
        // result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)])) (Const64 <typ.UInt64> [umagic64(c).s - 1]))
        for {
                t := v.Type
@@ -749,7 +747,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
-               if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul) {
+               if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul) {
                        break
                }
                v.reset(OpRsh64Ux64)
@@ -764,7 +762,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                return true
        }
        // match: (Div64u <t> x (Const64 [c]))
-       // cond: umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul
+       // cond: umagicOK64(c) && c&1 == 0 && config.useHmul
        // result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1])) (Const64 <typ.UInt64> [int64(1<<63 + (umagic64(c).m+1)/2)])) (Const64 <typ.UInt64> [umagic64(c).s - 2]))
        for {
                t := v.Type
@@ -773,7 +771,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
-               if !(umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul) {
+               if !(umagicOK64(c) && c&1 == 0 && config.useHmul) {
                        break
                }
                v.reset(OpRsh64Ux64)
@@ -792,7 +790,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                return true
        }
        // match: (Div64u <t> x (Const64 [c]))
-       // cond: umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul
+       // cond: umagicOK64(c) && config.useAvg && config.useHmul
        // result: (Rsh64Ux64 <t> (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)]))) (Const64 <typ.UInt64> [umagic64(c).s - 1]))
        for {
                t := v.Type
@@ -801,7 +799,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
-               if !(umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul) {
+               if !(umagicOK64(c) && config.useAvg && config.useHmul) {
                        break
                }
                v.reset(OpRsh64Ux64)
@@ -817,66 +815,6 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                v.AddArg2(v0, v3)
                return true
        }
-       // match: (Div64u x (Const64 [c]))
-       // cond: c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul
-       // result: (Add64 (Add64 <typ.UInt64> (Add64 <typ.UInt64> (Lsh64x64 <typ.UInt64> (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)]))) (Const64 <typ.UInt64> [32])) (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)])))) (Mul64 <typ.UInt64> (ZeroExt32to64 <typ.UInt64> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)]))) (Const64 <typ.UInt64> [int64((1<<32)/c)]))) (ZeroExt32to64 (Div32u <typ.UInt32> (Add32 <typ.UInt32> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)])) (Mul32 <typ.UInt32> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)])) (Const32 <typ.UInt32> [int32((1<<32)%c)]))) (Const32 <typ.UInt32> [int32(c)]))))
-       for {
-               x := v_0
-               if v_1.Op != OpConst64 {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               if !(c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul) {
-                       break
-               }
-               v.reset(OpAdd64)
-               v0 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64)
-               v1 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64)
-               v2 := b.NewValue0(v.Pos, OpLsh64x64, typ.UInt64)
-               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v4 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32)
-               v6 := b.NewValue0(v.Pos, OpRsh64Ux64, typ.UInt64)
-               v7 := b.NewValue0(v.Pos, OpConst64, typ.UInt64)
-               v7.AuxInt = int64ToAuxInt(32)
-               v6.AddArg2(x, v7)
-               v5.AddArg(v6)
-               v8 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v8.AuxInt = int32ToAuxInt(int32(c))
-               v4.AddArg2(v5, v8)
-               v3.AddArg(v4)
-               v2.AddArg2(v3, v7)
-               v9 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v10 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32)
-               v11 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32)
-               v11.AddArg(x)
-               v10.AddArg2(v11, v8)
-               v9.AddArg(v10)
-               v1.AddArg2(v2, v9)
-               v12 := b.NewValue0(v.Pos, OpMul64, typ.UInt64)
-               v13 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v14 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32)
-               v14.AddArg2(v5, v8)
-               v13.AddArg(v14)
-               v15 := b.NewValue0(v.Pos, OpConst64, typ.UInt64)
-               v15.AuxInt = int64ToAuxInt(int64((1 << 32) / c))
-               v12.AddArg2(v13, v15)
-               v0.AddArg2(v1, v12)
-               v16 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v17 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32)
-               v18 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
-               v19 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32)
-               v19.AddArg2(v11, v8)
-               v20 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
-               v21 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v21.AuxInt = int32ToAuxInt(int32((1 << 32) % c))
-               v20.AddArg2(v14, v21)
-               v18.AddArg2(v19, v20)
-               v17.AddArg2(v18, v8)
-               v16.AddArg(v17)
-               v.AddArg2(v0, v16)
-               return true
-       }
        return false
 }
 func rewriteValuedivmod_OpDiv8(v *Value) bool {
@@ -982,35 +920,6 @@ func rewriteValuedivmod_OpDiv8u(v *Value) bool {
        }
        return false
 }
-func rewriteValuedivmod_OpMod32u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Mod32u <t> x (Const32 [c]))
-       // cond: x.Op != OpConst32 && c > 0 && umagicOK32(c)
-       // result: (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
-       for {
-               t := v.Type
-               x := v_0
-               if v_1.Op != OpConst32 {
-                       break
-               }
-               c := auxIntToInt32(v_1.AuxInt)
-               if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) {
-                       break
-               }
-               v.reset(OpSub32)
-               v0 := b.NewValue0(v.Pos, OpMul32, t)
-               v1 := b.NewValue0(v.Pos, OpDiv32u, t)
-               v2 := b.NewValue0(v.Pos, OpConst32, t)
-               v2.AuxInt = int32ToAuxInt(c)
-               v1.AddArg2(x, v2)
-               v0.AddArg2(v1, v2)
-               v.AddArg2(x, v0)
-               return true
-       }
-       return false
-}
 func rewriteBlockdivmod(b *Block) bool {
        return false
 }
index 891f017d7ba5d2a24c5dbe22bf8d0742c6b13b98..fd5139c0bbdaeea2473908ee58914aa3f9c6c527 100644 (file)
@@ -14724,7 +14724,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool {
                return true
        }
        // match: (Mod16u <t> x (Const16 [c]))
-       // cond: x.Op != OpConst16 && c > 0 && umagicOK16(c)
+       // cond: x.Op != OpConst16 && c != 0
        // result: (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
        for {
                t := v.Type
@@ -14733,7 +14733,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool {
                        break
                }
                c := auxIntToInt16(v_1.AuxInt)
-               if !(x.Op != OpConst16 && c > 0 && umagicOK16(c)) {
+               if !(x.Op != OpConst16 && c != 0) {
                        break
                }
                v.reset(OpSub16)
@@ -14878,7 +14878,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool {
                return true
        }
        // match: (Mod32u <t> x (Const32 [c]))
-       // cond: x.Op != OpConst32 && c > 0 && umagicOK32(c)
+       // cond: x.Op != OpConst32 && c != 0
        // result: (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
        for {
                t := v.Type
@@ -14887,7 +14887,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool {
                        break
                }
                c := auxIntToInt32(v_1.AuxInt)
-               if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) {
+               if !(x.Op != OpConst32 && c != 0) {
                        break
                }
                v.reset(OpSub32)
@@ -15043,7 +15043,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool {
                return true
        }
        // match: (Mod64u <t> x (Const64 [c]))
-       // cond: x.Op != OpConst64 && c > 0 && umagicOK64(c)
+       // cond: x.Op != OpConst64 && c != 0
        // result: (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
        for {
                t := v.Type
@@ -15052,7 +15052,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool {
                        break
                }
                c := auxIntToInt64(v_1.AuxInt)
-               if !(x.Op != OpConst64 && c > 0 && umagicOK64(c)) {
+               if !(x.Op != OpConst64 && c != 0) {
                        break
                }
                v.reset(OpSub64)
@@ -15197,7 +15197,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
                return true
        }
        // match: (Mod8u <t> x (Const8 [c]))
-       // cond: x.Op != OpConst8 && c > 0 && umagicOK8( c)
+       // cond: x.Op != OpConst8 && c != 0
        // result: (Sub8 x (Mul8 <t> (Div8u <t> x (Const8 <t> [c])) (Const8 <t> [c])))
        for {
                t := v.Type
@@ -15206,7 +15206,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
                        break
                }
                c := auxIntToInt8(v_1.AuxInt)
-               if !(x.Op != OpConst8 && c > 0 && umagicOK8(c)) {
+               if !(x.Op != OpConst8 && c != 0) {
                        break
                }
                v.reset(OpSub8)
index 06887c934e7ba5b12badd874984dbe4d5efefdf5..bf9e71c1701d0816259d0fc192f7006cfa6e98be 100644 (file)
@@ -1223,7 +1223,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
                func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1])
                },
-               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64, sys.Loong64)
+               sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.PPC64, sys.S390X, sys.MIPS64, sys.MIPS, sys.RISCV64, sys.Loong64)
        alias("math/bits", "Mul", "math/bits", "Mul64", p8...)
        alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...)
        addF("math/bits", "Add64",
index 5a4e577fb6d5ded98f15d78aad39f81dafb74814..9311f843454c362c14b49c4998ef1f99f97ffb9b 100644 (file)
@@ -33,6 +33,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"386", "internal/runtime/sys", "TrailingZeros64"}:                 struct{}{},
        {"386", "internal/runtime/sys", "TrailingZeros8"}:                  struct{}{},
        {"386", "math", "sqrt"}:                                            struct{}{},
+       {"386", "math/bits", "Mul64"}:                                      struct{}{},
        {"386", "math/bits", "ReverseBytes32"}:                             struct{}{},
        {"386", "math/bits", "ReverseBytes64"}:                             struct{}{},
        {"386", "math/bits", "TrailingZeros16"}:                            struct{}{},
@@ -208,6 +209,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"arm", "math/bits", "Len32"}:                                      struct{}{},
        {"arm", "math/bits", "Len64"}:                                      struct{}{},
        {"arm", "math/bits", "Len8"}:                                       struct{}{},
+       {"arm", "math/bits", "Mul64"}:                                      struct{}{},
        {"arm", "math/bits", "ReverseBytes32"}:                             struct{}{},
        {"arm", "math/bits", "ReverseBytes64"}:                             struct{}{},
        {"arm", "math/bits", "RotateLeft32"}:                               struct{}{},
@@ -557,6 +559,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"mips", "math/bits", "Len32"}:                                     struct{}{},
        {"mips", "math/bits", "Len64"}:                                     struct{}{},
        {"mips", "math/bits", "Len8"}:                                      struct{}{},
+       {"mips", "math/bits", "Mul64"}:                                     struct{}{},
        {"mips", "math/bits", "TrailingZeros16"}:                           struct{}{},
        {"mips", "math/bits", "TrailingZeros32"}:                           struct{}{},
        {"mips", "math/bits", "TrailingZeros64"}:                           struct{}{},
@@ -806,6 +809,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
        {"mipsle", "math/bits", "Len32"}:                                   struct{}{},
        {"mipsle", "math/bits", "Len64"}:                                   struct{}{},
        {"mipsle", "math/bits", "Len8"}:                                    struct{}{},
+       {"mipsle", "math/bits", "Mul64"}:                                   struct{}{},
        {"mipsle", "math/bits", "TrailingZeros16"}:                         struct{}{},
        {"mipsle", "math/bits", "TrailingZeros32"}:                         struct{}{},
        {"mipsle", "math/bits", "TrailingZeros64"}:                         struct{}{},
index b9e226b20741fcc3d09d75aebb721bda5232007f..989ae0a1db21246bf8a59f133fcecb43d7223821 100644 (file)
@@ -704,27 +704,21 @@ func walkDivMod(n *ir.BinaryExpr, init *ir.Nodes) ir.Node {
        // runtime calls late in SSA processing.
        if types.RegSize < 8 && (et == types.TINT64 || et == types.TUINT64) {
                if n.Y.Op() == ir.OLITERAL {
-                       // Leave div/mod by constant powers of 2 or small 16-bit constants.
+                       // Leave div/mod by non-zero uint64 constants.
                        // The SSA backend will handle those.
+                       // (Zero constants should have been rejected already, but we check just in case.)
                        switch et {
                        case types.TINT64:
-                               c := ir.Int64Val(n.Y)
-                               if c < 0 {
-                                       c = -c
-                               }
-                               if c != 0 && c&(c-1) == 0 {
+                               if ir.Int64Val(n.Y) != 0 {
                                        return n
                                }
                        case types.TUINT64:
-                               c := ir.Uint64Val(n.Y)
-                               if c < 1<<16 {
-                                       return n
-                               }
-                               if c != 0 && c&(c-1) == 0 {
+                               if ir.Uint64Val(n.Y) != 0 {
                                        return n
                                }
                        }
                }
+               // Build call to uint64div, uint64mod, int64div, or int64mod.
                var fn string
                if et == types.TINT64 {
                        fn = "int64"
index d0aad08849635137fe22963f0b6afce919396ef6..348880f622f933c237137bebed5eea66610f9636 100644 (file)
@@ -167,7 +167,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.Op386SBBL:
                opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
 
-       case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
+       case ssa.Op386ADDLcarry, ssa.Op386ADCLcarry, ssa.Op386SUBLcarry:
                // output 0 is carry/borrow, output 1 is the low 32 bits.
                opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg())
 
index 3a78180817f324c53981a2ea0863d7acdc51b042..98d0852398c4372d4ab262b3e1c69228031e3b85 100644 (file)
@@ -279,7 +279,10 @@ func div3_uint32(i uint32) uint32 {
 }
 
 func div3_uint64(i uint64) uint64 {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1431655766"
+       // 386: "MULL"
+       // 386: "SHRL [$]1"
+       // 386 -".*CALL"
        // arm64: "MOVD [$]-6148914691236517205,"
        // arm64: "UMULH"
        // arm64: "LSR [$]1,"
@@ -308,7 +311,10 @@ func div14_uint32(i uint32) uint32 {
 }
 
 func div14_uint64(i uint64) uint64 {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1840700270,"
+       // 386: "MULL"
+       // 386: "SHRL [$]2,"
+       // 386: -".*CALL"
        // arm64: "MOVD [$]-7905747460161236406,"
        // arm64: "UMULH"
        // arm64: "LSR [$]2,"
@@ -343,7 +349,10 @@ func div7_uint32(i uint32) uint32 {
 }
 
 func div7_uint64(i uint64) uint64 {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1840700269,"
+       // 386: "MULL"
+       // 386: "SHRL [$]2,"
+       // 386: -".*CALL"
        // arm64: "MOVD [$]2635249153387078803,"
        // arm64: "UMULH"
        // arm64: "SUB",
@@ -353,7 +362,11 @@ func div7_uint64(i uint64) uint64 {
 }
 
 func div12345_uint64(i uint64) uint64 {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1444876402,"
+       // 386: "MOVL [$]835683390,"
+       // 386: "MULL"
+       // 386: "SHRL [$]13,"
+       // 386: "SHLL [$]19,"
        // arm64: "MOVD [$]-6205696892516465602,"
        // arm64: "UMULH"
        // arm64: "LSR [$]13,"
@@ -869,7 +882,12 @@ func ndivis6_int32(i int32) bool {
 }
 
 func divis6_int64(i int64) bool {
-       // 386 "CALL"
+       // 386: "IMUL3L [$]-1431655766,"
+       // 386: "IMUL3L [$]-1431655765,"
+       // 386: "ADCL [$]715827882,"
+       // 386: "CMPL .*, [$]715827882"
+       // 386: "CMPL .*, [$]-1431655766"
+       // 386: "SETLS"
        // arm64: "MOVD [$]-6148914691236517205,"
        // arm64: "MUL "
        // arm64: "MOVD [$]3074457345618258602,"
@@ -880,7 +898,12 @@ func divis6_int64(i int64) bool {
 }
 
 func ndivis6_int64(i int64) bool {
-       // 386 "CALL"
+       // 386: "IMUL3L [$]-1431655766,"
+       // 386: "IMUL3L [$]-1431655765,"
+       // 386: "ADCL [$]715827882,"
+       // 386: "CMPL .*, [$]715827882"
+       // 386: "CMPL .*, [$]-1431655766"
+       // 386: "SETHI"
        // arm64: "MOVD [$]-6148914691236517205,"
        // arm64: "MUL "
        // arm64: "MOVD [$]3074457345618258602,"
@@ -973,7 +996,14 @@ func div_ndivis6_uint32(i uint32) (uint32, bool) {
 }
 
 func div_divis6_uint64(i uint64) (uint64, bool) {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1431655766,"
+       // 386: "MOVL [$]-1431655765,"
+       // 386: "MULL"
+       // 386: "SHRL [$]2,"
+       // 386: "SHLL [$]30,"
+       // 386: "SETEQ"
+       // 386: -".*CALL"
+       // 386: -"RO[RL]"
        // arm64: "MOVD [$]-6148914691236517205,"
        // arm64: "UMULH"
        // arm64: "LSR [$]2,"
@@ -983,7 +1013,14 @@ func div_divis6_uint64(i uint64) (uint64, bool) {
 }
 
 func div_ndivis6_uint64(i uint64) (uint64, bool) {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1431655766,"
+       // 386: "MOVL [$]-1431655765,"
+       // 386: "MULL"
+       // 386: "SHRL [$]2,"
+       // 386: "SHLL [$]30,"
+       // 386: "SETNE"
+       // 386: -".*CALL"
+       // 386: -"RO[RL]"
        // arm64: "MOVD [$]-6148914691236517205,"
        // arm64: "UMULH"
        // arm64: "LSR [$]2,"
@@ -1091,7 +1128,16 @@ func div_ndivis6_int32(i int32) (int32, bool) {
 }
 
 func div_divis6_int64(i int64) (int64, bool) {
-       // 386 "CALL"
+       // 386: "ANDL [$]-1431655766,"
+       // 386: "ANDL [$]-1431655765,"
+       // 386: "MOVL [$]-1431655766,"
+       // 386: "MOVL [$]-1431655765,"
+       // 386: "SUBL" "SBBL"
+       // 386: "MULL"
+       // 386: "SETEQ"
+       // 386: -"SET(LS|HI)"
+       // 386: -".*CALL"
+       // 386: -"RO[RL]"
        // arm64: "MOVD [$]-6148914691236517205,"
        // arm64: "SMULH"
        // arm64: "ADD"
@@ -1103,7 +1149,16 @@ func div_divis6_int64(i int64) (int64, bool) {
 }
 
 func div_ndivis6_int64(i int64) (int64, bool) {
-       // 386 "CALL"
+       // 386: "ANDL [$]-1431655766,"
+       // 386: "ANDL [$]-1431655765,"
+       // 386: "MOVL [$]-1431655766,"
+       // 386: "MOVL [$]-1431655765,"
+       // 386: "SUBL" "SBBL"
+       // 386: "MULL"
+       // 386: "SETNE"
+       // 386: -"SET(LS|HI)"
+       // 386: -".*CALL"
+       // 386: -"RO[RL]"
        // arm64: "MOVD [$]-6148914691236517205,"
        // arm64: "SMULH"
        // arm64: "ADD"