cmd/compile: implement bits.Mul64 on 32-bit systems

author Russ Cox <rsc@golang.org>

Mon, 27 Oct 2025 23:41:39 +0000 (19:41 -0400)

committer Gopher Robot <gobot@golang.org>

Thu, 30 Oct 2025 15:04:20 +0000 (08:04 -0700)
author Russ Cox <rsc@golang.org>
Mon, 27 Oct 2025 23:41:39 +0000 (19:41 -0400)
committer Gopher Robot <gobot@golang.org>
Thu, 30 Oct 2025 15:04:20 +0000 (08:04 -0700)
diff --git a/src/cmd/compile/internal/arm/ssa.go b/src/cmd/compile/internal/arm/ssa.go

index a3bfb491b8b25f8df8e745eb5dce8e9ee72feadd..b31ffa474bc6d3066005f3c6556f58e4b0e423ba 100644 (file)
--- a/src/cmd/compile/internal/arm/ssa.go
+++ b/src/cmd/compile/internal/arm/ssa.go
@@ -245,6 +245,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                 p.To.Type = obj.TYPE_REG
                 p.To.Reg = r
         case ssa.OpARMADDS,
+               ssa.OpARMADCS,
                 ssa.OpARMSUBS:
                 r := v.Reg0()
                 r1 := v.Args[0].Reg()
diff --git a/src/cmd/compile/internal/ssa/_gen/386.rules b/src/cmd/compile/internal/ssa/_gen/386.rules

index 5f1150241929eb8d3958d8b7d8c8fd841636ab8a..cbe56f7579e6958f2f6d3c03e439dac5d05d2302 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/386.rules
+++ b/src/cmd/compile/internal/ssa/_gen/386.rules
@@ -7,6 +7,7 @@
  (Add(32|64)F ...) => (ADDS(S|D) ...)
  (Add32carry ...) => (ADDLcarry ...)
  (Add32withcarry ...) => (ADCL ...)
+(Add32carrywithcarry ...) => (ADCLcarry ...)
  
  (Sub(Ptr|32|16|8) ...) => (SUBL ...)
  (Sub(32|64)F ...) => (SUBS(S|D) ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/386Ops.go b/src/cmd/compile/internal/ssa/_gen/386Ops.go

index 60599a33abb587f22597717e4c240baec3e48ecd..09bfc4226ff8f5a941ba379419392e71591ab199 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/386Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/386Ops.go
@@ -90,22 +90,23 @@ func init() {
  
         // Common regInfo
         var (
-               gp01      = regInfo{inputs: nil, outputs: gponly}
-               gp11      = regInfo{inputs: []regMask{gp}, outputs: gponly}
-               gp11sp    = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
-               gp11sb    = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
-               gp21      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-               gp11carry = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
-               gp21carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
-               gp1carry1 = regInfo{inputs: []regMask{gp}, outputs: gponly}
-               gp2carry1 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
-               gp21sp    = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
-               gp21sb    = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
-               gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
-               gp11div   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx}
-               gp21hmul  = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
-               gp11mod   = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax}
-               gp21mul   = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}
+               gp01           = regInfo{inputs: nil, outputs: gponly}
+               gp11           = regInfo{inputs: []regMask{gp}, outputs: gponly}
+               gp11sp         = regInfo{inputs: []regMask{gpsp}, outputs: gponly}
+               gp11sb         = regInfo{inputs: []regMask{gpspsb}, outputs: gponly}
+               gp21           = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
+               gp11carry      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp, 0}}
+               gp21carry      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
+               gp1carry1      = regInfo{inputs: []regMask{gp}, outputs: gponly}
+               gp2carry1      = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
+               gp2carry1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
+               gp21sp         = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
+               gp21sb         = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
+               gp21shift      = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
+               gp11div        = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax}, clobbers: dx}
+               gp21hmul       = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx}, clobbers: ax}
+               gp11mod        = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx}, clobbers: ax}
+               gp21mul        = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx, ax}}
  
                 gp2flags     = regInfo{inputs: []regMask{gpsp, gpsp}}
                 gp1flags     = regInfo{inputs: []regMask{gpsp}}
@@ -181,10 +182,11 @@ func init() {
                 {name: "ADDL", argLength: 2, reg: gp21sp, asm: "ADDL", commutative: true, clobberFlags: true},                // arg0 + arg1
                 {name: "ADDLconst", argLength: 1, reg: gp11sp, asm: "ADDL", aux: "Int32", typ: "UInt32", clobberFlags: true}, // arg0 + auxint
  
-               {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true},                // arg0 + arg1, generates <carry,result> pair
-               {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true},                // arg0 + auxint, generates <carry,result> pair
-               {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags
-               {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0+auxint+carry(arg1), where arg1 is flags
+               {name: "ADDLcarry", argLength: 2, reg: gp21carry, asm: "ADDL", commutative: true, resultInArg0: true},                          // arg0 + arg1, generates <carry,result> pair
+               {name: "ADDLconstcarry", argLength: 1, reg: gp11carry, asm: "ADDL", aux: "Int32", resultInArg0: true},                          // arg0 + auxint, generates <carry,result> pair
+               {name: "ADCL", argLength: 3, reg: gp2carry1, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true},           // arg0+arg1+carry(arg2), where arg2 is flags
+               {name: "ADCLcarry", argLength: 3, reg: gp2carry1carry, asm: "ADCL", commutative: true, resultInArg0: true, clobberFlags: true}, // arg0+arg1+carry(arg2), where arg2 is flags, generates <carry,result> pair
+               {name: "ADCLconst", argLength: 2, reg: gp1carry1, asm: "ADCL", aux: "Int32", resultInArg0: true, clobberFlags: true},           // arg0+auxint+carry(arg1), where arg1 is flags
  
                 {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},                    // arg0 - arg1
                 {name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, // arg0 - auxint
diff --git a/src/cmd/compile/internal/ssa/_gen/ARM.rules b/src/cmd/compile/internal/ssa/_gen/ARM.rules

index 18b5d6bba6099e21c6b0c3a1448bb4067eeed477..b63ca23de14d6be7701af9e1d14a3af8c08d42cd 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM.rules
@@ -6,6 +6,7 @@
  (Add(32|64)F ...) => (ADD(F|D) ...)
  (Add32carry ...) => (ADDS ...)
  (Add32withcarry ...) => (ADC ...)
+(Add32carrywithcarry ...) => (ADCS ...)
  
  (Sub(Ptr|32|16|8) ...) => (SUB ...)
  (Sub(32|64)F ...) => (SUB(F|D) ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/ARMOps.go b/src/cmd/compile/internal/ssa/_gen/ARMOps.go

index 01cd48835e2bc4cafa8cc1429d7f39b18a0f8bdf..59bb71b2e3c70a9403bc440d8338b6c7ecbddae3 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/ARMOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/ARMOps.go
@@ -102,36 +102,37 @@ func init() {
         )
         // Common regInfo
         var (
-               gp01      = regInfo{inputs: nil, outputs: []regMask{gp}}
-               gp11      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
-               gp11carry = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}}
-               gp11sp    = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
-               gp1flags  = regInfo{inputs: []regMask{gpg}}
-               gp1flags1 = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
-               gp21      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
-               gp21carry = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}}
-               gp2flags  = regInfo{inputs: []regMask{gpg, gpg}}
-               gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
-               gp22      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
-               gp31      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-               gp31carry = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}}
-               gp3flags  = regInfo{inputs: []regMask{gp, gp, gp}}
-               gp3flags1 = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
-               gpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
-               gpstore   = regInfo{inputs: []regMask{gpspsbg, gpg}}
-               gp2load   = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
-               gp2store  = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
-               fp01      = regInfo{inputs: nil, outputs: []regMask{fp}}
-               fp11      = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
-               fp1flags  = regInfo{inputs: []regMask{fp}}
-               fpgp      = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp
-               gpfp      = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")}
-               fp21      = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
-               fp31      = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
-               fp2flags  = regInfo{inputs: []regMask{fp, fp}}
-               fpload    = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
-               fpstore   = regInfo{inputs: []regMask{gpspsbg, fp}}
-               readflags = regInfo{inputs: nil, outputs: []regMask{gp}}
+               gp01           = regInfo{inputs: nil, outputs: []regMask{gp}}
+               gp11           = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
+               gp11carry      = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}}
+               gp11sp         = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
+               gp1flags       = regInfo{inputs: []regMask{gpg}}
+               gp1flags1      = regInfo{inputs: []regMask{gp}, outputs: []regMask{gp}}
+               gp21           = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
+               gp21carry      = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, 0}}
+               gp2flags       = regInfo{inputs: []regMask{gpg, gpg}}
+               gp2flags1      = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
+               gp2flags1carry = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp, 0}}
+               gp22           = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
+               gp31           = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
+               gp31carry      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp, 0}}
+               gp3flags       = regInfo{inputs: []regMask{gp, gp, gp}}
+               gp3flags1      = regInfo{inputs: []regMask{gp, gp, gp}, outputs: []regMask{gp}}
+               gpload         = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
+               gpstore        = regInfo{inputs: []regMask{gpspsbg, gpg}}
+               gp2load        = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
+               gp2store       = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
+               fp01           = regInfo{inputs: nil, outputs: []regMask{fp}}
+               fp11           = regInfo{inputs: []regMask{fp}, outputs: []regMask{fp}}
+               fp1flags       = regInfo{inputs: []regMask{fp}}
+               fpgp           = regInfo{inputs: []regMask{fp}, outputs: []regMask{gp}, clobbers: buildReg("F15")} // int-float conversion uses F15 as tmp
+               gpfp           = regInfo{inputs: []regMask{gp}, outputs: []regMask{fp}, clobbers: buildReg("F15")}
+               fp21           = regInfo{inputs: []regMask{fp, fp}, outputs: []regMask{fp}}
+               fp31           = regInfo{inputs: []regMask{fp, fp, fp}, outputs: []regMask{fp}}
+               fp2flags       = regInfo{inputs: []regMask{fp, fp}}
+               fpload         = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{fp}}
+               fpstore        = regInfo{inputs: []regMask{gpspsbg, fp}}
+               readflags      = regInfo{inputs: nil, outputs: []regMask{gp}}
         )
         ops := []opData{
                 // binary ops
@@ -161,16 +162,17 @@ func init() {
                         call:         false, // TODO(mdempsky): Should this be true?
                 },
  
-               {name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true}, // arg0 + arg1, set carry flag
-               {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"}, // arg0 + auxInt, set carry flag
-               {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true},  // arg0 + arg1 + carry, arg2=flags
-               {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"},  // arg0 + auxInt + carry, arg1=flags
-               {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"},                    // arg0 - arg1, set carry flag
-               {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"}, // arg0 - auxInt, set carry flag
-               {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"}, // auxInt - arg0, set carry flag
-               {name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"},                     // arg0 - arg1 - carry, arg2=flags
-               {name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"},  // arg0 - auxInt - carry, arg1=flags
-               {name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"},  // auxInt - arg0 - carry, arg1=flags
+               {name: "ADDS", argLength: 2, reg: gp21carry, asm: "ADD", commutative: true},      // arg0 + arg1, set carry flag
+               {name: "ADDSconst", argLength: 1, reg: gp11carry, asm: "ADD", aux: "Int32"},      // arg0 + auxInt, set carry flag
+               {name: "ADC", argLength: 3, reg: gp2flags1, asm: "ADC", commutative: true},       // arg0 + arg1 + carry, arg2=flags
+               {name: "ADCconst", argLength: 2, reg: gp1flags1, asm: "ADC", aux: "Int32"},       // arg0 + auxInt + carry, arg1=flags
+               {name: "ADCS", argLength: 3, reg: gp2flags1carry, asm: "ADC", commutative: true}, // arg0 + arg1 + carrry, sets carry
+               {name: "SUBS", argLength: 2, reg: gp21carry, asm: "SUB"},                         // arg0 - arg1, set carry flag
+               {name: "SUBSconst", argLength: 1, reg: gp11carry, asm: "SUB", aux: "Int32"},      // arg0 - auxInt, set carry flag
+               {name: "RSBSconst", argLength: 1, reg: gp11carry, asm: "RSB", aux: "Int32"},      // auxInt - arg0, set carry flag
+               {name: "SBC", argLength: 3, reg: gp2flags1, asm: "SBC"},                          // arg0 - arg1 - carry, arg2=flags
+               {name: "SBCconst", argLength: 2, reg: gp1flags1, asm: "SBC", aux: "Int32"},       // arg0 - auxInt - carry, arg1=flags
+               {name: "RSCconst", argLength: 2, reg: gp1flags1, asm: "RSC", aux: "Int32"},       // auxInt - arg0 - carry, arg1=flags
  
                 {name: "MULLU", argLength: 2, reg: gp22, asm: "MULLU", commutative: true}, // arg0 * arg1, high 32 bits in out0, low 32 bits in out1
                 {name: "MULA", argLength: 3, reg: gp31, asm: "MULA"},                      // arg0 * arg1 + arg2
diff --git a/src/cmd/compile/internal/ssa/_gen/MIPS.rules b/src/cmd/compile/internal/ssa/_gen/MIPS.rules

index 80bf9017f524281441001418c874d47ce1f77859..fe1e00a4e4c6f3afa1d6864ec75959756ddf0e24 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/MIPS.rules
+++ b/src/cmd/compile/internal/ssa/_gen/MIPS.rules
@@ -9,6 +9,12 @@
  (Select1 (Add32carry <t> x y)) => (SGTU <typ.Bool> x (ADD <t.FieldType(0)> x y))
  (Add32withcarry <t> x y c) => (ADD c (ADD <t> x y))
  
+(Select0 (Add32carrywithcarry <t> x y c)) => (ADD <t.FieldType(0)> c (ADD <t.FieldType(0)> x y))
+(Select1 (Add32carrywithcarry <t> x y c)) =>
+       (OR <typ.Bool>
+               (SGTU <typ.Bool> x xy:(ADD <t.FieldType(0)> x y))
+               (SGTU <typ.Bool> xy (ADD <t.FieldType(0)> c xy)))
+
  (Sub(Ptr|32|16|8) ...) => (SUB ...)
  (Sub(32|64)F ...) => (SUB(F|D) ...)
  
diff --git a/src/cmd/compile/internal/ssa/_gen/dec64.rules b/src/cmd/compile/internal/ssa/_gen/dec64.rules

index 589c2fcfc1468d9da95c0e846acd7f56c3c41ed7..483818906e6ff8fa71fd488e2671c9eb3a3d3b35 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/dec64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/dec64.rules
@@ -6,8 +6,12 @@
  // architectures. These rules work together with the decomposeBuiltin
  // pass which handles phis of these typ.
  
+(Last ___) => v.Args[len(v.Args)-1]
+
  (Int64Hi (Int64Make hi _)) => hi
  (Int64Lo (Int64Make _ lo)) => lo
+(Select0 (MakeTuple x y)) => x
+(Select1 (MakeTuple x y)) => y
  
  (Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() =>
         (Int64Make
@@ -60,30 +64,85 @@
      (Arg <typ.UInt32> {n} [off])
      (Arg <typ.UInt32> {n} [off+4]))
  
-(Add64 x y) =>
-       (Int64Make
-               (Add32withcarry <typ.Int32>
-                       (Int64Hi x)
-                       (Int64Hi y)
-                       (Select1 <types.TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y))))
-               (Select0 <typ.UInt32> (Add32carry (Int64Lo x) (Int64Lo y))))
+(Add64 <t> x y) =>
+       (Last <t>
+               x0: (Int64Lo x)
+               x1: (Int64Hi x)
+               y0: (Int64Lo y)
+               y1: (Int64Hi y)
+               add: (Add32carry x0 y0)
+               (Int64Make
+                       (Add32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> add))
+                       (Select0 <typ.UInt32> add)))
+
+(Sub64 <t> x y) =>
+       (Last <t>
+               x0: (Int64Lo x)
+               x1: (Int64Hi x)
+               y0: (Int64Lo y)
+               y1: (Int64Hi y)
+               sub: (Sub32carry x0 y0)
+               (Int64Make
+                       (Sub32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> sub))
+                       (Select0 <typ.UInt32> sub)))
+
+(Mul64 <t> x y) =>
+       (Last <t>
+               x0: (Int64Lo x)
+               x1: (Int64Hi x)
+               y0: (Int64Lo y)
+               y1: (Int64Hi y)
+               x0y0: (Mul32uhilo x0 y0)
+               x0y0Hi: (Select0 <typ.UInt32> x0y0)
+               x0y0Lo: (Select1 <typ.UInt32> x0y0)
+               (Int64Make
+                       (Add32 <typ.UInt32> x0y0Hi
+                               (Add32 <typ.UInt32>
+                                       (Mul32 <typ.UInt32> x0 y1)
+                                       (Mul32 <typ.UInt32> x1 y0)))
+                       x0y0Lo))
+
+(Mul64uhilo <t> x y) =>
+       (Last <t>
+               x0: (Int64Lo x)
+               x1: (Int64Hi x)
+               y0: (Int64Lo y)
+               y1: (Int64Hi y)
+               x0y0: (Mul32uhilo x0 y0)
+               x0y1: (Mul32uhilo x0 y1)
+               x1y0: (Mul32uhilo x1 y0)
+               x1y1: (Mul32uhilo x1 y1)
+               x0y0Hi: (Select0 <typ.UInt32> x0y0)
+               x0y0Lo: (Select1 <typ.UInt32> x0y0)
+               x0y1Hi: (Select0 <typ.UInt32> x0y1)
+               x0y1Lo: (Select1 <typ.UInt32> x0y1)
+               x1y0Hi: (Select0 <typ.UInt32> x1y0)
+               x1y0Lo: (Select1 <typ.UInt32> x1y0)
+               x1y1Hi: (Select0 <typ.UInt32> x1y1)
+               x1y1Lo: (Select1 <typ.UInt32> x1y1)
+               w1a: (Add32carry x0y0Hi x0y1Lo)
+               w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 <types.TypeFlags> w1a))
+               w3a: (Add32withcarry <typ.UInt32> x1y1Hi (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2a))
+               w1b: (Add32carry x1y0Lo (Select0 <typ.UInt32> w1a))
+               w2b: (Add32carrywithcarry x1y1Lo (Select0 <typ.UInt32> w2a) (Select1 <types.TypeFlags> w1b))
+               w3b: (Add32withcarry <typ.UInt32> w3a (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2b))
+               (MakeTuple <types.NewTuple(typ.UInt64,typ.UInt64)>
+                       (Int64Make w3b (Select0 <typ.UInt32> w2b))
+                       (Int64Make (Select0 <typ.UInt32> w1b) x0y0Lo)))
+
+(Hmul64u x y) => (Select0 (Mul64uhilo x y))
+
+// Hacker's Delight p. 175: signed hmul = unsigned hmul - (x<0)&y - (y<0)&x.
+(Hmul64 x y) =>
+       (Last
+               p: (Hmul64u <typ.UInt64> x y)
+               xSign: (Int64Make xs:(Rsh32x32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [31])) xs)
+               ySign: (Int64Make ys:(Rsh32x32 <typ.UInt32> (Int64Hi y) (Const32 <typ.UInt32> [31])) ys)
+               (Sub64 <typ.Int64> (Sub64 <typ.Int64> p (And64 <typ.Int64> xSign y)) (And64 <typ.Int64> ySign x)))
+
+// (x+y)/2 => (x-y)/2 + y
+(Avg64u <t> x y) => (Add64 (Rsh64Ux32 <t> (Sub64 <t> x y) (Const32 <typ.UInt32> [1])) y)
  
-(Sub64 x y) =>
-       (Int64Make
-               (Sub32withcarry <typ.Int32>
-                       (Int64Hi x)
-                       (Int64Hi y)
-                       (Select1 <types.TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y))))
-               (Select0 <typ.UInt32> (Sub32carry (Int64Lo x) (Int64Lo y))))
-
-(Mul64 x y) =>
-       (Int64Make
-               (Add32 <typ.UInt32>
-                       (Mul32 <typ.UInt32> (Int64Lo x) (Int64Hi y))
-                       (Add32 <typ.UInt32>
-                               (Mul32 <typ.UInt32> (Int64Hi x) (Int64Lo y))
-                               (Select0 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y)))))
-               (Select1 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y))))
  
  (And64 x y) =>
         (Int64Make
diff --git a/src/cmd/compile/internal/ssa/_gen/divmod.rules b/src/cmd/compile/internal/ssa/_gen/divmod.rules

index c7c9e132095cca9e35e1766baa9daebc6e1b8dfe..21e0a194068df8f5a76f7accb087a2566dc7f507 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/divmod.rules
+++ b/src/cmd/compile/internal/ssa/_gen/divmod.rules
@@ -118,7 +118,7 @@
        (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m/2)]))
        (Const64 <typ.UInt64> [smagic32(c).s - 1]))
      (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
-(Div64 <t> x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul =>
+(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul =>
    (Sub64 <t>
      (Rsh64x64 <t>
        (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)]))
@@ -132,7 +132,7 @@
        (Add32 <t> x (Hmul32 <t> x (Const32 <typ.UInt32> [int32(smagic32(c).m)])))
        (Const64 <typ.UInt64> [smagic32(c).s]))
      (Rsh32x64 <t> x (Const64 <typ.UInt64> [31])))
-(Div64 <t> x (Const64 [c])) && smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul =>
+(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul =>
    (Sub64 <t>
      (Rsh64x64 <t>
        (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])))
@@ -153,7 +153,7 @@
    (Rsh32Ux64 <t>
      (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(smagic32(c).m)]))
      (Const64 <typ.UInt64> [smagic32(c).s]))
-(Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul =>
+(Div64u <t> x (Const64 [c])) && t.IsSigned() && smagicOK64(c) && config.useHmul =>
    (Rsh64Ux64 <t>
      (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))
      (Const64 <typ.UInt64> [smagic64(c).s]))
@@ -185,7 +185,7 @@
    (Rsh32Ux64 <t>
      (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(1<<31 + umagic32(c).m/2)]))
      (Const64 <typ.UInt64> [umagic32(c).s - 1]))
-(Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul =>
+(Div64u <t> x (Const64 [c])) && umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul =>
    (Rsh64Ux64 <t>
      (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)]))
      (Const64 <typ.UInt64> [umagic64(c).s - 1]))
@@ -211,7 +211,7 @@
        (Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1]))
        (Const32 <typ.UInt32> [int32(1<<31 + (umagic32(c).m+1)/2)]))
      (Const64 <typ.UInt64> [umagic32(c).s - 2]))
-(Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul =>
+(Div64u <t> x (Const64 [c])) && umagicOK64(c) && c&1 == 0 && config.useHmul =>
    (Rsh64Ux64 <t>
      (Hmul64u <typ.UInt64>
        (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1]))
@@ -237,52 +237,7 @@
    (Rsh32Ux64 <t>
      (Avg32u x (Hmul32u <typ.UInt32> x (Const32 <typ.UInt32> [int32(umagic32(c).m)])))
      (Const64 <typ.UInt64> [umagic32(c).s - 1]))
-(Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul =>
+(Div64u <t> x (Const64 [c])) && umagicOK64(c) && config.useAvg && config.useHmul =>
    (Rsh64Ux64 <t>
      (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)])))
      (Const64 <typ.UInt64> [umagic64(c).s - 1]))
-
-// Case 9. For unsigned 64-bit divides on 32-bit machines,
-// if the constant fits in 16 bits (so that the last term
-// fits in 32 bits), convert to three 32-bit divides by a constant.
-//
-// If 1<<32 = Q * c + R
-// and    x = hi << 32 + lo
-//
-// Then x = (hi/c*c + hi%c) << 32 + lo
-//        = hi/c*c<<32 + hi%c<<32 + lo
-//        = hi/c*c<<32 + (hi%c)*(Q*c+R) + lo/c*c + lo%c
-//        = hi/c*c<<32 + (hi%c)*Q*c + lo/c*c + (hi%c*R+lo%c)
-// and x / c = (hi/c)<<32 + (hi%c)*Q + lo/c + (hi%c*R+lo%c)/c
-(Div64u x (Const64 [c])) && c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul =>
-  (Add64
-    (Add64 <typ.UInt64>
-      (Add64 <typ.UInt64>
-        (Lsh64x64 <typ.UInt64>
-          (ZeroExt32to64
-            (Div32u <typ.UInt32>
-              (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
-              (Const32 <typ.UInt32> [int32(c)])))
-          (Const64 <typ.UInt64> [32]))
-        (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)]))))
-      (Mul64 <typ.UInt64>
-        (ZeroExt32to64 <typ.UInt64>
-          (Mod32u <typ.UInt32>
-            (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
-            (Const32 <typ.UInt32> [int32(c)])))
-        (Const64 <typ.UInt64> [int64((1<<32)/c)])))
-      (ZeroExt32to64
-        (Div32u <typ.UInt32>
-          (Add32 <typ.UInt32>
-            (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)]))
-            (Mul32 <typ.UInt32>
-              (Mod32u <typ.UInt32>
-                (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
-                (Const32 <typ.UInt32> [int32(c)]))
-              (Const32 <typ.UInt32> [int32((1<<32)%c)])))
-          (Const32 <typ.UInt32> [int32(c)]))))
-
-// Repeated from generic.rules, for expanding the expression above
-// (which can then be further expanded to handle the nested Div32u).
-(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c)
-  => (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
diff --git a/src/cmd/compile/internal/ssa/_gen/generic.rules b/src/cmd/compile/internal/ssa/_gen/generic.rules

index 3f026448326005c81f11ad38696299285c86edb2..7e3aba1e5ee8d5282f018b3e7ebb3a667280a55e 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/_gen/generic.rules
@@ -1106,13 +1106,13 @@
    => (Sub32 x (Mul32 <t> (Div32  <t> x (Const32 <t> [c])) (Const32 <t> [c])))
  (Mod64  <t> x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63)
    => (Sub64 x (Mul64 <t> (Div64  <t> x (Const64 <t> [c])) (Const64 <t> [c])))
-(Mod8u  <t> x (Const8  [c])) && x.Op != OpConst8  && c > 0 && umagicOK8( c)
+(Mod8u  <t> x (Const8  [c])) && x.Op != OpConst8  && c != 0
    => (Sub8  x (Mul8  <t> (Div8u  <t> x (Const8  <t> [c])) (Const8  <t> [c])))
-(Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c > 0 && umagicOK16(c)
+(Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c != 0
    => (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
-(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c)
+(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c != 0
    => (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
-(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK64(c)
+(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c != 0
    => (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
  
  // Set up for mod->mul+rot optimization in genericlateopt.rules.
diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go

index 1f6ad4e16d98f8803ed54473ac360db487bb2b48..09fb4bf03f943cac38e04119c156d404e2740cbc 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go
@@ -16,6 +16,9 @@ package main
  // are signed or unsigned.
  
  var genericOps = []opData{
+       // Pseudo-op.
+       {name: "Last", argLength: -1}, // return last element of tuple; for "let" bindings
+
         // 2-input arithmetic
         // Types must be consistent with Go typing. Add, for example, must take two values
         // of the same type and produces that same type.
@@ -557,8 +560,9 @@ var genericOps = []opData{
         {name: "Int64Hi", argLength: 1, typ: "UInt32"},   // high 32-bit of arg0
         {name: "Int64Lo", argLength: 1, typ: "UInt32"},   // low 32-bit of arg0
  
-       {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1, returns (value, carry)
-       {name: "Add32withcarry", argLength: 3, commutative: true},                    // arg0 + arg1 + arg2, arg2=carry (0 or 1)
+       {name: "Add32carry", argLength: 2, commutative: true, typ: "(UInt32,Flags)"},          // arg0 + arg1, returns (value, carry)
+       {name: "Add32withcarry", argLength: 3, commutative: true},                             // arg0 + arg1 + arg2, arg2=carry (0 or 1)
+       {name: "Add32carrywithcarry", argLength: 3, commutative: true, typ: "(UInt32,Flags)"}, // arg0 + arg1 + arg2, arg2=carry, returns (value, carry)
  
         {name: "Sub32carry", argLength: 2, typ: "(UInt32,Flags)"}, // arg0 - arg1, returns (value, carry)
         {name: "Sub32withcarry", argLength: 3},                    // arg0 - arg1 - arg2, arg2=carry (0 or 1)
diff --git a/src/cmd/compile/internal/ssa/_gen/rulegen.go b/src/cmd/compile/internal/ssa/_gen/rulegen.go

index f818b46511d04a08f0b4f46dad689e990935e549..e3a10707fed81d8521b9d510e09ff69db4fcb0c9 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/_gen/rulegen.go
@@ -1271,8 +1271,10 @@ func genResult0(rr *RuleRewrite, arch arch, result string, top, move bool, pos s
         case 0:
         case 1:
                 rr.add(stmtf("%s.AddArg(%s)", v, all.String()))
-       default:
+       case 2, 3, 4, 5, 6:
                 rr.add(stmtf("%s.AddArg%d(%s)", v, len(args), all.String()))
+       default:
+               rr.add(stmtf("%s.AddArgs(%s)", v, all.String()))
         }
  
         if cse != nil {
@@ -1313,6 +1315,12 @@ outer:
                                 d++
                         case d > 0 && s[i] == close:
                                 d--
+                       case s[i] == ':':
+                               // ignore spaces after colons
+                               nonsp = true
+                               for i+1 < len(s) && (s[i+1] == ' ' || s[i+1] == '\t') {
+                                       i++
+                               }
                         default:
                                 nonsp = true
                         }
@@ -1347,7 +1355,7 @@ func extract(val string) (op, typ, auxint, aux string, args []string) {
         val = val[1 : len(val)-1] // remove ()
  
         // Split val up into regions.
-       // Split by spaces/tabs, except those contained in (), {}, [], or <>.
+       // Split by spaces/tabs, except those contained in (), {}, [], or <> or after colon.
         s := split(val)
  
         // Extract restrictions and args.
@@ -1471,7 +1479,7 @@ func splitNameExpr(arg string) (name, expr string) {
                 // colon is inside the parens, such as in "(Foo x:(Bar))".
                 return "", arg
         }
-       return arg[:colon], arg[colon+1:]
+       return arg[:colon], strings.TrimSpace(arg[colon+1:])
  }
  
  func getBlockInfo(op string, arch arch) (name string, data blockData) {
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go

index 16a983a56878d033f4f0721638e924d2f6dded62..264f4b3bf378f1301e86bf19bd69baf3182f68d4 100644 (file)
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -386,6 +386,7 @@ const (
         Op386ADDLcarry
         Op386ADDLconstcarry
         Op386ADCL
+       Op386ADCLcarry
         Op386ADCLconst
         Op386SUBL
         Op386SUBLconst
@@ -1182,6 +1183,7 @@ const (
         OpARMADDSconst
         OpARMADC
         OpARMADCconst
+       OpARMADCS
         OpARMSUBS
         OpARMSUBSconst
         OpARMRSBSconst
@@ -3010,6 +3012,7 @@ const (
         OpWasmI64Rotl
         OpWasmI64Popcnt
  
+       OpLast
         OpAdd8
         OpAdd16
         OpAdd32
@@ -3336,6 +3339,7 @@ const (
         OpInt64Lo
         OpAdd32carry
         OpAdd32withcarry
+       OpAdd32carrywithcarry
         OpSub32carry
         OpSub32withcarry
         OpAdd64carry
@@ -3968,6 +3972,24 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:         "ADCLcarry",
+               argLen:       3,
+               commutative:  true,
+               resultInArg0: true,
+               clobberFlags: true,
+               asm:          x86.AADCL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 239}, // AX CX DX BX BP SI DI
+                               {1, 239}, // AX CX DX BX BP SI DI
+                       },
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 239}, // AX CX DX BX BP SI DI
+                       },
+               },
+       },
         {
                 name:         "ADCLconst",
                 auxType:      auxInt32,
@@ -15792,6 +15814,22 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:        "ADCS",
+               argLen:      3,
+               commutative: true,
+               asm:         arm.AADC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                               {1, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+                       },
+               },
+       },
         {
                 name:   "SUBS",
                 argLen: 2,
@@ -40672,6 +40710,11 @@ var opcodeTable = [...]opInfo{
                 },
         },
  
+       {
+               name:    "Last",
+               argLen:  -1,
+               generic: true,
+       },
         {
                 name:        "Add8",
                 argLen:      2,
@@ -42480,6 +42523,12 @@ var opcodeTable = [...]opInfo{
                 commutative: true,
                 generic:     true,
         },
+       {
+               name:        "Add32carrywithcarry",
+               argLen:      3,
+               commutative: true,
+               generic:     true,
+       },
         {
                 name:    "Sub32carry",
                 argLen:  2,
diff --git a/src/cmd/compile/internal/ssa/rewrite386.go b/src/cmd/compile/internal/ssa/rewrite386.go

index 0495438710659e78b8b472f9c2be9ec140a25067..be88dd3cddadf721e9ce8e69239ef1a248adca5d 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
@@ -257,6 +257,9 @@ func rewriteValue386(v *Value) bool {
         case OpAdd32carry:
                 v.Op = Op386ADDLcarry
                 return true
+       case OpAdd32carrywithcarry:
+               v.Op = Op386ADCLcarry
+               return true
         case OpAdd32withcarry:
                 v.Op = Op386ADCL
                 return true
diff --git a/src/cmd/compile/internal/ssa/rewriteARM.go b/src/cmd/compile/internal/ssa/rewriteARM.go

index 44380cf8f57d85ba4de8c389cd76b45883fc2fd2..2a90e7b433bd6e0336192701fc622319cd9189af 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteARM.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM.go
@@ -446,6 +446,9 @@ func rewriteValueARM(v *Value) bool {
         case OpAdd32carry:
                 v.Op = OpARMADDS
                 return true
+       case OpAdd32carrywithcarry:
+               v.Op = OpARMADCS
+               return true
         case OpAdd32withcarry:
                 v.Op = OpARMADC
                 return true
diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS.go b/src/cmd/compile/internal/ssa/rewriteMIPS.go

index fda02e64d19d7085beee9e88783c5ed8ea5941fa..ff696337ef82990bf5d553a07470c33929407e23 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteMIPS.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS.go
@@ -6562,6 +6562,23 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
                 v.AddArg2(x, y)
                 return true
         }
+       // match: (Select0 (Add32carrywithcarry <t> x y c))
+       // result: (ADD <t.FieldType(0)> c (ADD <t.FieldType(0)> x y))
+       for {
+               if v_0.Op != OpAdd32carrywithcarry {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpMIPSADD)
+               v.Type = t.FieldType(0)
+               v0 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0))
+               v0.AddArg2(x, y)
+               v.AddArg2(c, v0)
+               return true
+       }
         // match: (Select0 (Sub32carry <t> x y))
         // result: (SUB <t.FieldType(0)> x y)
         for {
@@ -6759,6 +6776,29 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
                 v.AddArg2(x, v0)
                 return true
         }
+       // match: (Select1 (Add32carrywithcarry <t> x y c))
+       // result: (OR <typ.Bool> (SGTU <typ.Bool> x xy:(ADD <t.FieldType(0)> x y)) (SGTU <typ.Bool> xy (ADD <t.FieldType(0)> c xy)))
+       for {
+               if v_0.Op != OpAdd32carrywithcarry {
+                       break
+               }
+               t := v_0.Type
+               c := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpMIPSOR)
+               v.Type = typ.Bool
+               v0 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool)
+               xy := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0))
+               xy.AddArg2(x, y)
+               v0.AddArg2(x, xy)
+               v2 := b.NewValue0(v.Pos, OpMIPSSGTU, typ.Bool)
+               v3 := b.NewValue0(v.Pos, OpMIPSADD, t.FieldType(0))
+               v3.AddArg2(c, xy)
+               v2.AddArg2(xy, v3)
+               v.AddArg2(v0, v2)
+               return true
+       }
         // match: (Select1 (Sub32carry <t> x y))
         // result: (SGTU <typ.Bool> (SUB <t.FieldType(0)> x y) x)
         for {
diff --git a/src/cmd/compile/internal/ssa/rewritedec64.go b/src/cmd/compile/internal/ssa/rewritedec64.go

index b4da78fd5229bf04d3f644679593c106af6c8f7f..a0388551b5301581ecfaf3154d2fa5f7019a8840 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewritedec64.go
+++ b/src/cmd/compile/internal/ssa/rewritedec64.go
@@ -12,6 +12,8 @@ func rewriteValuedec64(v *Value) bool {
                 return rewriteValuedec64_OpAnd64(v)
         case OpArg:
                 return rewriteValuedec64_OpArg(v)
+       case OpAvg64u:
+               return rewriteValuedec64_OpAvg64u(v)
         case OpBitLen64:
                 return rewriteValuedec64_OpBitLen64(v)
         case OpBswap64:
@@ -27,10 +29,16 @@ func rewriteValuedec64(v *Value) bool {
                 return true
         case OpEq64:
                 return rewriteValuedec64_OpEq64(v)
+       case OpHmul64:
+               return rewriteValuedec64_OpHmul64(v)
+       case OpHmul64u:
+               return rewriteValuedec64_OpHmul64u(v)
         case OpInt64Hi:
                 return rewriteValuedec64_OpInt64Hi(v)
         case OpInt64Lo:
                 return rewriteValuedec64_OpInt64Lo(v)
+       case OpLast:
+               return rewriteValuedec64_OpLast(v)
         case OpLeq64:
                 return rewriteValuedec64_OpLeq64(v)
         case OpLeq64U:
@@ -57,6 +65,8 @@ func rewriteValuedec64(v *Value) bool {
                 return rewriteValuedec64_OpLsh8x64(v)
         case OpMul64:
                 return rewriteValuedec64_OpMul64(v)
+       case OpMul64uhilo:
+               return rewriteValuedec64_OpMul64uhilo(v)
         case OpNeg64:
                 return rewriteValuedec64_OpNeg64(v)
         case OpNeq64:
@@ -101,6 +111,10 @@ func rewriteValuedec64(v *Value) bool {
                 return rewriteValuedec64_OpRsh8Ux64(v)
         case OpRsh8x64:
                 return rewriteValuedec64_OpRsh8x64(v)
+       case OpSelect0:
+               return rewriteValuedec64_OpSelect0(v)
+       case OpSelect1:
+               return rewriteValuedec64_OpSelect1(v)
         case OpSignExt16to64:
                 return rewriteValuedec64_OpSignExt16to64(v)
         case OpSignExt32to64:
@@ -133,29 +147,33 @@ func rewriteValuedec64_OpAdd64(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
         typ := &b.Func.Config.Types
-       // match: (Add64 x y)
-       // result: (Int64Make (Add32withcarry <typ.Int32> (Int64Hi x) (Int64Hi y) (Select1 <types.TypeFlags> (Add32carry (Int64Lo x) (Int64Lo y)))) (Select0 <typ.UInt32> (Add32carry (Int64Lo x) (Int64Lo y))))
+       // match: (Add64 <t> x y)
+       // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) add: (Add32carry x0 y0) (Int64Make (Add32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> add)) (Select0 <typ.UInt32> add)))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpInt64Make)
-               v0 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.Int32)
-               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4 := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v5.AddArg(x)
-               v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v6.AddArg(y)
-               v4.AddArg2(v5, v6)
-               v3.AddArg(v4)
-               v0.AddArg3(v1, v2, v3)
-               v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
-               v7.AddArg(v4)
-               v.AddArg2(v0, v7)
+               v.reset(OpLast)
+               v.Type = t
+               x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               x0.AddArg(x)
+               x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               x1.AddArg(x)
+               y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               y0.AddArg(y)
+               y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               y1.AddArg(y)
+               add := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               add.AddArg2(x0, y0)
+               v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v6 := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v7.AddArg(add)
+               v6.AddArg3(x1, y1, v7)
+               v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v8.AddArg(add)
+               v5.AddArg2(v6, v8)
+               v.AddArg6(x0, x1, y0, y1, add, v5)
                 return true
         }
  }
@@ -268,6 +286,28 @@ func rewriteValuedec64_OpArg(v *Value) bool {
         }
         return false
  }
+func rewriteValuedec64_OpAvg64u(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Avg64u <t> x y)
+       // result: (Add64 (Rsh64Ux32 <t> (Sub64 <t> x y) (Const32 <typ.UInt32> [1])) y)
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               v.reset(OpAdd64)
+               v0 := b.NewValue0(v.Pos, OpRsh64Ux32, t)
+               v1 := b.NewValue0(v.Pos, OpSub64, t)
+               v1.AddArg2(x, y)
+               v2 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v2.AuxInt = int32ToAuxInt(1)
+               v0.AddArg2(v1, v2)
+               v.AddArg2(v0, y)
+               return true
+       }
+}
  func rewriteValuedec64_OpBitLen64(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
@@ -430,6 +470,62 @@ func rewriteValuedec64_OpEq64(v *Value) bool {
                 return true
         }
  }
+func rewriteValuedec64_OpHmul64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Hmul64 x y)
+       // result: (Last p: (Hmul64u <typ.UInt64> x y) xSign: (Int64Make xs:(Rsh32x32 <typ.UInt32> (Int64Hi x) (Const32 <typ.UInt32> [31])) xs) ySign: (Int64Make ys:(Rsh32x32 <typ.UInt32> (Int64Hi y) (Const32 <typ.UInt32> [31])) ys) (Sub64 <typ.Int64> (Sub64 <typ.Int64> p (And64 <typ.Int64> xSign y)) (And64 <typ.Int64> ySign x)))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpLast)
+               p := b.NewValue0(v.Pos, OpHmul64u, typ.UInt64)
+               p.AddArg2(x, y)
+               xSign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               xs := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32)
+               v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v3.AddArg(x)
+               v4 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v4.AuxInt = int32ToAuxInt(31)
+               xs.AddArg2(v3, v4)
+               xSign.AddArg2(xs, xs)
+               ySign := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               ys := b.NewValue0(v.Pos, OpRsh32x32, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               v7.AddArg(y)
+               ys.AddArg2(v7, v4)
+               ySign.AddArg2(ys, ys)
+               v8 := b.NewValue0(v.Pos, OpSub64, typ.Int64)
+               v9 := b.NewValue0(v.Pos, OpSub64, typ.Int64)
+               v10 := b.NewValue0(v.Pos, OpAnd64, typ.Int64)
+               v10.AddArg2(xSign, y)
+               v9.AddArg2(p, v10)
+               v11 := b.NewValue0(v.Pos, OpAnd64, typ.Int64)
+               v11.AddArg2(ySign, x)
+               v8.AddArg2(v9, v11)
+               v.AddArg4(p, xSign, ySign, v8)
+               return true
+       }
+}
+func rewriteValuedec64_OpHmul64u(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Hmul64u x y)
+       // result: (Select0 (Mul64uhilo x y))
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpSelect0)
+               v0 := b.NewValue0(v.Pos, OpMul64uhilo, types.NewTuple(typ.UInt64, typ.UInt64))
+               v0.AddArg2(x, y)
+               v.AddArg(v0)
+               return true
+       }
+}
  func rewriteValuedec64_OpInt64Hi(v *Value) bool {
         v_0 := v.Args[0]
         // match: (Int64Hi (Int64Make hi _))
@@ -458,6 +554,14 @@ func rewriteValuedec64_OpInt64Lo(v *Value) bool {
         }
         return false
  }
+func rewriteValuedec64_OpLast(v *Value) bool {
+       // match: (Last ___)
+       // result: v.Args[len(v.Args)-1]
+       for {
+               v.copyOf(v.Args[len(v.Args)-1])
+               return true
+       }
+}
  func rewriteValuedec64_OpLeq64(v *Value) bool {
         v_1 := v.Args[1]
         v_0 := v.Args[0]
@@ -1114,35 +1218,124 @@ func rewriteValuedec64_OpMul64(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
         typ := &b.Func.Config.Types
-       // match: (Mul64 x y)
-       // result: (Int64Make (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Int64Lo x) (Int64Hi y)) (Add32 <typ.UInt32> (Mul32 <typ.UInt32> (Int64Hi x) (Int64Lo y)) (Select0 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y))))) (Select1 <typ.UInt32> (Mul32uhilo (Int64Lo x) (Int64Lo y))))
+       // match: (Mul64 <t> x y)
+       // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y0Hi: (Select0 <typ.UInt32> x0y0) x0y0Lo: (Select1 <typ.UInt32> x0y0) (Int64Make (Add32 <typ.UInt32> x0y0Hi (Add32 <typ.UInt32> (Mul32 <typ.UInt32> x0 y1) (Mul32 <typ.UInt32> x1 y0))) x0y0Lo))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpInt64Make)
-               v0 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
-               v1 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
-               v2 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v2.AddArg(x)
-               v3 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v3.AddArg(y)
-               v1.AddArg2(v2, v3)
-               v4 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
-               v6 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v6.AddArg(x)
-               v7 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v7.AddArg(y)
-               v5.AddArg2(v6, v7)
-               v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
-               v9 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
-               v9.AddArg2(v2, v7)
-               v8.AddArg(v9)
-               v4.AddArg2(v5, v8)
-               v0.AddArg2(v1, v4)
-               v10 := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
-               v10.AddArg(v9)
-               v.AddArg2(v0, v10)
+               v.reset(OpLast)
+               v.Type = t
+               x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               x0.AddArg(x)
+               x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               x1.AddArg(x)
+               y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               y0.AddArg(y)
+               y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               y1.AddArg(y)
+               x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x0y0.AddArg2(x0, y0)
+               x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x0y0Hi.AddArg(x0y0)
+               x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x0y0Lo.AddArg(x0y0)
+               v7 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v8 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
+               v9 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
+               v10 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
+               v10.AddArg2(x0, y1)
+               v11 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
+               v11.AddArg2(x1, y0)
+               v9.AddArg2(v10, v11)
+               v8.AddArg2(x0y0Hi, v9)
+               v7.AddArg2(v8, x0y0Lo)
+               v.AddArgs(x0, x1, y0, y1, x0y0, x0y0Hi, x0y0Lo, v7)
+               return true
+       }
+}
+func rewriteValuedec64_OpMul64uhilo(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (Mul64uhilo <t> x y)
+       // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) x0y0: (Mul32uhilo x0 y0) x0y1: (Mul32uhilo x0 y1) x1y0: (Mul32uhilo x1 y0) x1y1: (Mul32uhilo x1 y1) x0y0Hi: (Select0 <typ.UInt32> x0y0) x0y0Lo: (Select1 <typ.UInt32> x0y0) x0y1Hi: (Select0 <typ.UInt32> x0y1) x0y1Lo: (Select1 <typ.UInt32> x0y1) x1y0Hi: (Select0 <typ.UInt32> x1y0) x1y0Lo: (Select1 <typ.UInt32> x1y0) x1y1Hi: (Select0 <typ.UInt32> x1y1) x1y1Lo: (Select1 <typ.UInt32> x1y1) w1a: (Add32carry x0y0Hi x0y1Lo) w2a: (Add32carrywithcarry x0y1Hi x1y0Hi (Select1 <types.TypeFlags> w1a)) w3a: (Add32withcarry <typ.UInt32> x1y1Hi (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2a)) w1b: (Add32carry x1y0Lo (Select0 <typ.UInt32> w1a)) w2b: (Add32carrywithcarry x1y1Lo (Select0 <typ.UInt32> w2a) (Select1 <types.TypeFlags> w1b)) w3b: (Add32withcarry <typ.UInt32> w3a (Const32 <typ.UInt32> [0]) (Select1 <types.TypeFlags> w2b)) (MakeTuple <types.NewTuple(typ.UInt64,typ.UInt64)> (Int64Make w3b (Select0 <typ.UInt32> w2b)) (Int64Make (Select0 <typ.UInt32> w1b) x0y0Lo)))
+       for {
+               t := v.Type
+               x := v_0
+               y := v_1
+               v.reset(OpLast)
+               v.Type = t
+               x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               x0.AddArg(x)
+               x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               x1.AddArg(x)
+               y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               y0.AddArg(y)
+               y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               y1.AddArg(y)
+               x0y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x0y0.AddArg2(x0, y0)
+               x0y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x0y1.AddArg2(x0, y1)
+               x1y0 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x1y0.AddArg2(x1, y0)
+               x1y1 := b.NewValue0(v.Pos, OpMul32uhilo, types.NewTuple(typ.UInt32, typ.UInt32))
+               x1y1.AddArg2(x1, y1)
+               x0y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x0y0Hi.AddArg(x0y0)
+               x0y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x0y0Lo.AddArg(x0y0)
+               x0y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x0y1Hi.AddArg(x0y1)
+               x0y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x0y1Lo.AddArg(x0y1)
+               x1y0Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x1y0Hi.AddArg(x1y0)
+               x1y0Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x1y0Lo.AddArg(x1y0)
+               x1y1Hi := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               x1y1Hi.AddArg(x1y1)
+               x1y1Lo := b.NewValue0(v.Pos, OpSelect1, typ.UInt32)
+               x1y1Lo.AddArg(x1y1)
+               w1a := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               w1a.AddArg2(x0y0Hi, x0y1Lo)
+               w2a := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v18 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v18.AddArg(w1a)
+               w2a.AddArg3(x0y1Hi, x1y0Hi, v18)
+               w3a := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32)
+               v20 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
+               v20.AuxInt = int32ToAuxInt(0)
+               v21 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v21.AddArg(w2a)
+               w3a.AddArg3(x1y1Hi, v20, v21)
+               w1b := b.NewValue0(v.Pos, OpAdd32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v23 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v23.AddArg(w1a)
+               w1b.AddArg2(x1y0Lo, v23)
+               w2b := b.NewValue0(v.Pos, OpAdd32carrywithcarry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               v25 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v25.AddArg(w2a)
+               v26 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v26.AddArg(w1b)
+               w2b.AddArg3(x1y1Lo, v25, v26)
+               w3b := b.NewValue0(v.Pos, OpAdd32withcarry, typ.UInt32)
+               v28 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v28.AddArg(w2b)
+               w3b.AddArg3(w3a, v20, v28)
+               v29 := b.NewValue0(v.Pos, OpMakeTuple, types.NewTuple(typ.UInt64, typ.UInt64))
+               v30 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v31 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v31.AddArg(w2b)
+               v30.AddArg2(w3b, v31)
+               v32 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v33 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v33.AddArg(w1b)
+               v32.AddArg2(v33, x0y0Lo)
+               v29.AddArg2(v30, v32)
+               v.AddArgs(x0, x1, y0, y1, x0y0, x0y1, x1y0, x1y1, x0y0Hi, x0y0Lo, x0y1Hi, x0y1Lo, x1y0Hi, x1y0Lo, x1y1Hi, x1y1Lo, w1a, w2a, w3a, w1b, w2b, w3b, v29)
                 return true
         }
  }
@@ -2705,6 +2898,34 @@ func rewriteValuedec64_OpRsh8x64(v *Value) bool {
                 return true
         }
  }
+func rewriteValuedec64_OpSelect0(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Select0 (MakeTuple x y))
+       // result: x
+       for {
+               if v_0.Op != OpMakeTuple {
+                       break
+               }
+               x := v_0.Args[0]
+               v.copyOf(x)
+               return true
+       }
+       return false
+}
+func rewriteValuedec64_OpSelect1(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Select1 (MakeTuple x y))
+       // result: y
+       for {
+               if v_0.Op != OpMakeTuple {
+                       break
+               }
+               y := v_0.Args[1]
+               v.copyOf(y)
+               return true
+       }
+       return false
+}
  func rewriteValuedec64_OpSignExt16to64(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
@@ -2815,29 +3036,33 @@ func rewriteValuedec64_OpSub64(v *Value) bool {
         v_0 := v.Args[0]
         b := v.Block
         typ := &b.Func.Config.Types
-       // match: (Sub64 x y)
-       // result: (Int64Make (Sub32withcarry <typ.Int32> (Int64Hi x) (Int64Hi y) (Select1 <types.TypeFlags> (Sub32carry (Int64Lo x) (Int64Lo y)))) (Select0 <typ.UInt32> (Sub32carry (Int64Lo x) (Int64Lo y))))
+       // match: (Sub64 <t> x y)
+       // result: (Last <t> x0: (Int64Lo x) x1: (Int64Hi x) y0: (Int64Lo y) y1: (Int64Hi y) sub: (Sub32carry x0 y0) (Int64Make (Sub32withcarry <typ.UInt32> x1 y1 (Select1 <types.TypeFlags> sub)) (Select0 <typ.UInt32> sub)))
         for {
+               t := v.Type
                 x := v_0
                 y := v_1
-               v.reset(OpInt64Make)
-               v0 := b.NewValue0(v.Pos, OpSub32withcarry, typ.Int32)
-               v1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v1.AddArg(x)
-               v2 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
-               v2.AddArg(y)
-               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
-               v4 := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
-               v5 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v5.AddArg(x)
-               v6 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
-               v6.AddArg(y)
-               v4.AddArg2(v5, v6)
-               v3.AddArg(v4)
-               v0.AddArg3(v1, v2, v3)
-               v7 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
-               v7.AddArg(v4)
-               v.AddArg2(v0, v7)
+               v.reset(OpLast)
+               v.Type = t
+               x0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               x0.AddArg(x)
+               x1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               x1.AddArg(x)
+               y0 := b.NewValue0(v.Pos, OpInt64Lo, typ.UInt32)
+               y0.AddArg(y)
+               y1 := b.NewValue0(v.Pos, OpInt64Hi, typ.UInt32)
+               y1.AddArg(y)
+               sub := b.NewValue0(v.Pos, OpSub32carry, types.NewTuple(typ.UInt32, types.TypeFlags))
+               sub.AddArg2(x0, y0)
+               v5 := b.NewValue0(v.Pos, OpInt64Make, typ.UInt64)
+               v6 := b.NewValue0(v.Pos, OpSub32withcarry, typ.UInt32)
+               v7 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v7.AddArg(sub)
+               v6.AddArg3(x1, y1, v7)
+               v8 := b.NewValue0(v.Pos, OpSelect0, typ.UInt32)
+               v8.AddArg(sub)
+               v5.AddArg2(v6, v8)
+               v.AddArg6(x0, x1, y0, y1, sub, v5)
                 return true
         }
  }
diff --git a/src/cmd/compile/internal/ssa/rewritedivmod.go b/src/cmd/compile/internal/ssa/rewritedivmod.go

index fc37d84999f477c6702c3991a198ffbfb2ead8d8..02978075a8aede5ffce009f2042482fc9ec41fbc 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewritedivmod.go
+++ b/src/cmd/compile/internal/ssa/rewritedivmod.go
@@ -20,8 +20,6 @@ func rewriteValuedivmod(v *Value) bool {
                 return rewriteValuedivmod_OpDiv8(v)
         case OpDiv8u:
                 return rewriteValuedivmod_OpDiv8u(v)
-       case OpMod32u:
-               return rewriteValuedivmod_OpMod32u(v)
         }
         return false
  }
@@ -646,7 +644,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
                 return true
         }
         // match: (Div64 <t> x (Const64 [c]))
-       // cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul
+       // cond: smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul
         // result: (Sub64 <t> (Rsh64x64 <t> (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m/2)])) (Const64 <typ.UInt64> [smagic64(c).s - 1])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
         for {
                 t := v.Type
@@ -655,7 +653,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
                         break
                 }
                 c := auxIntToInt64(v_1.AuxInt)
-               if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 == 0 && config.useHmul) {
+               if !(smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul) {
                         break
                 }
                 v.reset(OpSub64)
@@ -676,7 +674,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
                 return true
         }
         // match: (Div64 <t> x (Const64 [c]))
-       // cond: smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul
+       // cond: smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul
         // result: (Sub64 <t> (Rsh64x64 <t> (Add64 <t> x (Hmul64 <t> x (Const64 <typ.UInt64> [int64(smagic64(c).m)]))) (Const64 <typ.UInt64> [smagic64(c).s])) (Rsh64x64 <t> x (Const64 <typ.UInt64> [63])))
         for {
                 t := v.Type
@@ -685,7 +683,7 @@ func rewriteValuedivmod_OpDiv64(v *Value) bool {
                         break
                 }
                 c := auxIntToInt64(v_1.AuxInt)
-               if !(smagicOK64(c) && config.RegSize == 8 && smagic64(c).m&1 != 0 && config.useHmul) {
+               if !(smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul) {
                         break
                 }
                 v.reset(OpSub64)
@@ -716,7 +714,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
         config := b.Func.Config
         typ := &b.Func.Config.Types
         // match: (Div64u <t> x (Const64 [c]))
-       // cond: t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul
+       // cond: t.IsSigned() && smagicOK64(c) && config.useHmul
         // result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(smagic64(c).m)])) (Const64 <typ.UInt64> [smagic64(c).s]))
         for {
                 t := v.Type
@@ -725,7 +723,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                         break
                 }
                 c := auxIntToInt64(v_1.AuxInt)
-               if !(t.IsSigned() && smagicOK64(c) && config.RegSize == 8 && config.useHmul) {
+               if !(t.IsSigned() && smagicOK64(c) && config.useHmul) {
                         break
                 }
                 v.reset(OpRsh64Ux64)
@@ -740,7 +738,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                 return true
         }
         // match: (Div64u <t> x (Const64 [c]))
-       // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul
+       // cond: umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul
         // result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(1<<63 + umagic64(c).m/2)])) (Const64 <typ.UInt64> [umagic64(c).s - 1]))
         for {
                 t := v.Type
@@ -749,7 +747,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                         break
                 }
                 c := auxIntToInt64(v_1.AuxInt)
-               if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.RegSize == 8 && config.useHmul) {
+               if !(umagicOK64(c) && umagic64(c).m&1 == 0 && config.useHmul) {
                         break
                 }
                 v.reset(OpRsh64Ux64)
@@ -764,7 +762,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                 return true
         }
         // match: (Div64u <t> x (Const64 [c]))
-       // cond: umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul
+       // cond: umagicOK64(c) && c&1 == 0 && config.useHmul
         // result: (Rsh64Ux64 <t> (Hmul64u <typ.UInt64> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1])) (Const64 <typ.UInt64> [int64(1<<63 + (umagic64(c).m+1)/2)])) (Const64 <typ.UInt64> [umagic64(c).s - 2]))
         for {
                 t := v.Type
@@ -773,7 +771,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                         break
                 }
                 c := auxIntToInt64(v_1.AuxInt)
-               if !(umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul) {
+               if !(umagicOK64(c) && c&1 == 0 && config.useHmul) {
                         break
                 }
                 v.reset(OpRsh64Ux64)
@@ -792,7 +790,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                 return true
         }
         // match: (Div64u <t> x (Const64 [c]))
-       // cond: umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul
+       // cond: umagicOK64(c) && config.useAvg && config.useHmul
         // result: (Rsh64Ux64 <t> (Avg64u x (Hmul64u <typ.UInt64> x (Const64 <typ.UInt64> [int64(umagic64(c).m)]))) (Const64 <typ.UInt64> [umagic64(c).s - 1]))
         for {
                 t := v.Type
@@ -801,7 +799,7 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                         break
                 }
                 c := auxIntToInt64(v_1.AuxInt)
-               if !(umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul) {
+               if !(umagicOK64(c) && config.useAvg && config.useHmul) {
                         break
                 }
                 v.reset(OpRsh64Ux64)
@@ -817,66 +815,6 @@ func rewriteValuedivmod_OpDiv64u(v *Value) bool {
                 v.AddArg2(v0, v3)
                 return true
         }
-       // match: (Div64u x (Const64 [c]))
-       // cond: c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul
-       // result: (Add64 (Add64 <typ.UInt64> (Add64 <typ.UInt64> (Lsh64x64 <typ.UInt64> (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)]))) (Const64 <typ.UInt64> [32])) (ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)])))) (Mul64 <typ.UInt64> (ZeroExt32to64 <typ.UInt64> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)]))) (Const64 <typ.UInt64> [int64((1<<32)/c)]))) (ZeroExt32to64 (Div32u <typ.UInt32> (Add32 <typ.UInt32> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)])) (Mul32 <typ.UInt32> (Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32]))) (Const32 <typ.UInt32> [int32(c)])) (Const32 <typ.UInt32> [int32((1<<32)%c)]))) (Const32 <typ.UInt32> [int32(c)]))))
-       for {
-               x := v_0
-               if v_1.Op != OpConst64 {
-                       break
-               }
-               c := auxIntToInt64(v_1.AuxInt)
-               if !(c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul) {
-                       break
-               }
-               v.reset(OpAdd64)
-               v0 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64)
-               v1 := b.NewValue0(v.Pos, OpAdd64, typ.UInt64)
-               v2 := b.NewValue0(v.Pos, OpLsh64x64, typ.UInt64)
-               v3 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v4 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32)
-               v5 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32)
-               v6 := b.NewValue0(v.Pos, OpRsh64Ux64, typ.UInt64)
-               v7 := b.NewValue0(v.Pos, OpConst64, typ.UInt64)
-               v7.AuxInt = int64ToAuxInt(32)
-               v6.AddArg2(x, v7)
-               v5.AddArg(v6)
-               v8 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v8.AuxInt = int32ToAuxInt(int32(c))
-               v4.AddArg2(v5, v8)
-               v3.AddArg(v4)
-               v2.AddArg2(v3, v7)
-               v9 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v10 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32)
-               v11 := b.NewValue0(v.Pos, OpTrunc64to32, typ.UInt32)
-               v11.AddArg(x)
-               v10.AddArg2(v11, v8)
-               v9.AddArg(v10)
-               v1.AddArg2(v2, v9)
-               v12 := b.NewValue0(v.Pos, OpMul64, typ.UInt64)
-               v13 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v14 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32)
-               v14.AddArg2(v5, v8)
-               v13.AddArg(v14)
-               v15 := b.NewValue0(v.Pos, OpConst64, typ.UInt64)
-               v15.AuxInt = int64ToAuxInt(int64((1 << 32) / c))
-               v12.AddArg2(v13, v15)
-               v0.AddArg2(v1, v12)
-               v16 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64)
-               v17 := b.NewValue0(v.Pos, OpDiv32u, typ.UInt32)
-               v18 := b.NewValue0(v.Pos, OpAdd32, typ.UInt32)
-               v19 := b.NewValue0(v.Pos, OpMod32u, typ.UInt32)
-               v19.AddArg2(v11, v8)
-               v20 := b.NewValue0(v.Pos, OpMul32, typ.UInt32)
-               v21 := b.NewValue0(v.Pos, OpConst32, typ.UInt32)
-               v21.AuxInt = int32ToAuxInt(int32((1 << 32) % c))
-               v20.AddArg2(v14, v21)
-               v18.AddArg2(v19, v20)
-               v17.AddArg2(v18, v8)
-               v16.AddArg(v17)
-               v.AddArg2(v0, v16)
-               return true
-       }
         return false
  }
  func rewriteValuedivmod_OpDiv8(v *Value) bool {
@@ -982,35 +920,6 @@ func rewriteValuedivmod_OpDiv8u(v *Value) bool {
         }
         return false
  }
-func rewriteValuedivmod_OpMod32u(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (Mod32u <t> x (Const32 [c]))
-       // cond: x.Op != OpConst32 && c > 0 && umagicOK32(c)
-       // result: (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
-       for {
-               t := v.Type
-               x := v_0
-               if v_1.Op != OpConst32 {
-                       break
-               }
-               c := auxIntToInt32(v_1.AuxInt)
-               if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) {
-                       break
-               }
-               v.reset(OpSub32)
-               v0 := b.NewValue0(v.Pos, OpMul32, t)
-               v1 := b.NewValue0(v.Pos, OpDiv32u, t)
-               v2 := b.NewValue0(v.Pos, OpConst32, t)
-               v2.AuxInt = int32ToAuxInt(c)
-               v1.AddArg2(x, v2)
-               v0.AddArg2(v1, v2)
-               v.AddArg2(x, v0)
-               return true
-       }
-       return false
-}
  func rewriteBlockdivmod(b *Block) bool {
         return false
  }
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go

index 891f017d7ba5d2a24c5dbe22bf8d0742c6b13b98..fd5139c0bbdaeea2473908ee58914aa3f9c6c527 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -14724,7 +14724,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool {
                 return true
         }
         // match: (Mod16u <t> x (Const16 [c]))
-       // cond: x.Op != OpConst16 && c > 0 && umagicOK16(c)
+       // cond: x.Op != OpConst16 && c != 0
         // result: (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
         for {
                 t := v.Type
@@ -14733,7 +14733,7 @@ func rewriteValuegeneric_OpMod16u(v *Value) bool {
                         break
                 }
                 c := auxIntToInt16(v_1.AuxInt)
-               if !(x.Op != OpConst16 && c > 0 && umagicOK16(c)) {
+               if !(x.Op != OpConst16 && c != 0) {
                         break
                 }
                 v.reset(OpSub16)
@@ -14878,7 +14878,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool {
                 return true
         }
         // match: (Mod32u <t> x (Const32 [c]))
-       // cond: x.Op != OpConst32 && c > 0 && umagicOK32(c)
+       // cond: x.Op != OpConst32 && c != 0
         // result: (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
         for {
                 t := v.Type
@@ -14887,7 +14887,7 @@ func rewriteValuegeneric_OpMod32u(v *Value) bool {
                         break
                 }
                 c := auxIntToInt32(v_1.AuxInt)
-               if !(x.Op != OpConst32 && c > 0 && umagicOK32(c)) {
+               if !(x.Op != OpConst32 && c != 0) {
                         break
                 }
                 v.reset(OpSub32)
@@ -15043,7 +15043,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool {
                 return true
         }
         // match: (Mod64u <t> x (Const64 [c]))
-       // cond: x.Op != OpConst64 && c > 0 && umagicOK64(c)
+       // cond: x.Op != OpConst64 && c != 0
         // result: (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
         for {
                 t := v.Type
@@ -15052,7 +15052,7 @@ func rewriteValuegeneric_OpMod64u(v *Value) bool {
                         break
                 }
                 c := auxIntToInt64(v_1.AuxInt)
-               if !(x.Op != OpConst64 && c > 0 && umagicOK64(c)) {
+               if !(x.Op != OpConst64 && c != 0) {
                         break
                 }
                 v.reset(OpSub64)
@@ -15197,7 +15197,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
                 return true
         }
         // match: (Mod8u <t> x (Const8 [c]))
-       // cond: x.Op != OpConst8 && c > 0 && umagicOK8( c)
+       // cond: x.Op != OpConst8 && c != 0
         // result: (Sub8 x (Mul8 <t> (Div8u <t> x (Const8 <t> [c])) (Const8 <t> [c])))
         for {
                 t := v.Type
@@ -15206,7 +15206,7 @@ func rewriteValuegeneric_OpMod8u(v *Value) bool {
                         break
                 }
                 c := auxIntToInt8(v_1.AuxInt)
-               if !(x.Op != OpConst8 && c > 0 && umagicOK8(c)) {
+               if !(x.Op != OpConst8 && c != 0) {
                         break
                 }
                 v.reset(OpSub8)
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go

index 06887c934e7ba5b12badd874984dbe4d5efefdf5..bf9e71c1701d0816259d0fc192f7006cfa6e98be 100644 (file)
--- a/src/cmd/compile/internal/ssagen/intrinsics.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics.go
@@ -1223,7 +1223,7 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
                 func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                         return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1])
                 },
-               sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64, sys.Loong64)
+               sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.PPC64, sys.S390X, sys.MIPS64, sys.MIPS, sys.RISCV64, sys.Loong64)
         alias("math/bits", "Mul", "math/bits", "Mul64", p8...)
         alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...)
         addF("math/bits", "Add64",
diff --git a/src/cmd/compile/internal/ssagen/intrinsics_test.go b/src/cmd/compile/internal/ssagen/intrinsics_test.go

index 5a4e577fb6d5ded98f15d78aad39f81dafb74814..9311f843454c362c14b49c4998ef1f99f97ffb9b 100644 (file)
--- a/src/cmd/compile/internal/ssagen/intrinsics_test.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics_test.go
@@ -33,6 +33,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
         {"386", "internal/runtime/sys", "TrailingZeros64"}:                 struct{}{},
         {"386", "internal/runtime/sys", "TrailingZeros8"}:                  struct{}{},
         {"386", "math", "sqrt"}:                                            struct{}{},
+       {"386", "math/bits", "Mul64"}:                                      struct{}{},
         {"386", "math/bits", "ReverseBytes32"}:                             struct{}{},
         {"386", "math/bits", "ReverseBytes64"}:                             struct{}{},
         {"386", "math/bits", "TrailingZeros16"}:                            struct{}{},
@@ -208,6 +209,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
         {"arm", "math/bits", "Len32"}:                                      struct{}{},
         {"arm", "math/bits", "Len64"}:                                      struct{}{},
         {"arm", "math/bits", "Len8"}:                                       struct{}{},
+       {"arm", "math/bits", "Mul64"}:                                      struct{}{},
         {"arm", "math/bits", "ReverseBytes32"}:                             struct{}{},
         {"arm", "math/bits", "ReverseBytes64"}:                             struct{}{},
         {"arm", "math/bits", "RotateLeft32"}:                               struct{}{},
@@ -557,6 +559,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
         {"mips", "math/bits", "Len32"}:                                     struct{}{},
         {"mips", "math/bits", "Len64"}:                                     struct{}{},
         {"mips", "math/bits", "Len8"}:                                      struct{}{},
+       {"mips", "math/bits", "Mul64"}:                                     struct{}{},
         {"mips", "math/bits", "TrailingZeros16"}:                           struct{}{},
         {"mips", "math/bits", "TrailingZeros32"}:                           struct{}{},
         {"mips", "math/bits", "TrailingZeros64"}:                           struct{}{},
@@ -806,6 +809,7 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
         {"mipsle", "math/bits", "Len32"}:                                   struct{}{},
         {"mipsle", "math/bits", "Len64"}:                                   struct{}{},
         {"mipsle", "math/bits", "Len8"}:                                    struct{}{},
+       {"mipsle", "math/bits", "Mul64"}:                                   struct{}{},
         {"mipsle", "math/bits", "TrailingZeros16"}:                         struct{}{},
         {"mipsle", "math/bits", "TrailingZeros32"}:                         struct{}{},
         {"mipsle", "math/bits", "TrailingZeros64"}:                         struct{}{},
diff --git a/src/cmd/compile/internal/walk/expr.go b/src/cmd/compile/internal/walk/expr.go

index b9e226b20741fcc3d09d75aebb721bda5232007f..989ae0a1db21246bf8a59f133fcecb43d7223821 100644 (file)
--- a/src/cmd/compile/internal/walk/expr.go
+++ b/src/cmd/compile/internal/walk/expr.go
@@ -704,27 +704,21 @@ func walkDivMod(n *ir.BinaryExpr, init *ir.Nodes) ir.Node {
         // runtime calls late in SSA processing.
         if types.RegSize < 8 && (et == types.TINT64 || et == types.TUINT64) {
                 if n.Y.Op() == ir.OLITERAL {
-                       // Leave div/mod by constant powers of 2 or small 16-bit constants.
+                       // Leave div/mod by non-zero uint64 constants.
                         // The SSA backend will handle those.
+                       // (Zero constants should have been rejected already, but we check just in case.)
                         switch et {
                         case types.TINT64:
-                               c := ir.Int64Val(n.Y)
-                               if c < 0 {
-                                       c = -c
-                               }
-                               if c != 0 && c&(c-1) == 0 {
+                               if ir.Int64Val(n.Y) != 0 {
                                         return n
                                 }
                         case types.TUINT64:
-                               c := ir.Uint64Val(n.Y)
-                               if c < 1<<16 {
-                                       return n
-                               }
-                               if c != 0 && c&(c-1) == 0 {
+                               if ir.Uint64Val(n.Y) != 0 {
                                         return n
                                 }
                         }
                 }
+               // Build call to uint64div, uint64mod, int64div, or int64mod.
                 var fn string
                 if et == types.TINT64 {
                         fn = "int64"
diff --git a/src/cmd/compile/internal/x86/ssa.go b/src/cmd/compile/internal/x86/ssa.go

index d0aad08849635137fe22963f0b6afce919396ef6..348880f622f933c237137bebed5eea66610f9636 100644 (file)
--- a/src/cmd/compile/internal/x86/ssa.go
+++ b/src/cmd/compile/internal/x86/ssa.go
@@ -167,7 +167,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                 ssa.Op386SBBL:
                 opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
  
-       case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
+       case ssa.Op386ADDLcarry, ssa.Op386ADCLcarry, ssa.Op386SUBLcarry:
                 // output 0 is carry/borrow, output 1 is the low 32 bits.
                 opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg())
  
diff --git a/test/codegen/divmod.go b/test/codegen/divmod.go

index 3a78180817f324c53981a2ea0863d7acdc51b042..98d0852398c4372d4ab262b3e1c69228031e3b85 100644 (file)
--- a/test/codegen/divmod.go
+++ b/test/codegen/divmod.go
@@ -279,7 +279,10 @@ func div3_uint32(i uint32) uint32 {
  }
  
  func div3_uint64(i uint64) uint64 {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1431655766"
+       // 386: "MULL"
+       // 386: "SHRL [$]1"
+       // 386 -".*CALL"
         // arm64: "MOVD [$]-6148914691236517205,"
         // arm64: "UMULH"
         // arm64: "LSR [$]1,"
@@ -308,7 +311,10 @@ func div14_uint32(i uint32) uint32 {
  }
  
  func div14_uint64(i uint64) uint64 {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1840700270,"
+       // 386: "MULL"
+       // 386: "SHRL [$]2,"
+       // 386: -".*CALL"
         // arm64: "MOVD [$]-7905747460161236406,"
         // arm64: "UMULH"
         // arm64: "LSR [$]2,"
@@ -343,7 +349,10 @@ func div7_uint32(i uint32) uint32 {
  }
  
  func div7_uint64(i uint64) uint64 {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1840700269,"
+       // 386: "MULL"
+       // 386: "SHRL [$]2,"
+       // 386: -".*CALL"
         // arm64: "MOVD [$]2635249153387078803,"
         // arm64: "UMULH"
         // arm64: "SUB",
@@ -353,7 +362,11 @@ func div7_uint64(i uint64) uint64 {
  }
  
  func div12345_uint64(i uint64) uint64 {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1444876402,"
+       // 386: "MOVL [$]835683390,"
+       // 386: "MULL"
+       // 386: "SHRL [$]13,"
+       // 386: "SHLL [$]19,"
         // arm64: "MOVD [$]-6205696892516465602,"
         // arm64: "UMULH"
         // arm64: "LSR [$]13,"
@@ -869,7 +882,12 @@ func ndivis6_int32(i int32) bool {
  }
  
  func divis6_int64(i int64) bool {
-       // 386 "CALL"
+       // 386: "IMUL3L [$]-1431655766,"
+       // 386: "IMUL3L [$]-1431655765,"
+       // 386: "ADCL [$]715827882,"
+       // 386: "CMPL .*, [$]715827882"
+       // 386: "CMPL .*, [$]-1431655766"
+       // 386: "SETLS"
         // arm64: "MOVD [$]-6148914691236517205,"
         // arm64: "MUL "
         // arm64: "MOVD [$]3074457345618258602,"
@@ -880,7 +898,12 @@ func divis6_int64(i int64) bool {
  }
  
  func ndivis6_int64(i int64) bool {
-       // 386 "CALL"
+       // 386: "IMUL3L [$]-1431655766,"
+       // 386: "IMUL3L [$]-1431655765,"
+       // 386: "ADCL [$]715827882,"
+       // 386: "CMPL .*, [$]715827882"
+       // 386: "CMPL .*, [$]-1431655766"
+       // 386: "SETHI"
         // arm64: "MOVD [$]-6148914691236517205,"
         // arm64: "MUL "
         // arm64: "MOVD [$]3074457345618258602,"
@@ -973,7 +996,14 @@ func div_ndivis6_uint32(i uint32) (uint32, bool) {
  }
  
  func div_divis6_uint64(i uint64) (uint64, bool) {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1431655766,"
+       // 386: "MOVL [$]-1431655765,"
+       // 386: "MULL"
+       // 386: "SHRL [$]2,"
+       // 386: "SHLL [$]30,"
+       // 386: "SETEQ"
+       // 386: -".*CALL"
+       // 386: -"RO[RL]"
         // arm64: "MOVD [$]-6148914691236517205,"
         // arm64: "UMULH"
         // arm64: "LSR [$]2,"
@@ -983,7 +1013,14 @@ func div_divis6_uint64(i uint64) (uint64, bool) {
  }
  
  func div_ndivis6_uint64(i uint64) (uint64, bool) {
-       // 386 "CALL"
+       // 386: "MOVL [$]-1431655766,"
+       // 386: "MOVL [$]-1431655765,"
+       // 386: "MULL"
+       // 386: "SHRL [$]2,"
+       // 386: "SHLL [$]30,"
+       // 386: "SETNE"
+       // 386: -".*CALL"
+       // 386: -"RO[RL]"
         // arm64: "MOVD [$]-6148914691236517205,"
         // arm64: "UMULH"
         // arm64: "LSR [$]2,"
@@ -1091,7 +1128,16 @@ func div_ndivis6_int32(i int32) (int32, bool) {
  }
  
  func div_divis6_int64(i int64) (int64, bool) {
-       // 386 "CALL"
+       // 386: "ANDL [$]-1431655766,"
+       // 386: "ANDL [$]-1431655765,"
+       // 386: "MOVL [$]-1431655766,"
+       // 386: "MOVL [$]-1431655765,"
+       // 386: "SUBL" "SBBL"
+       // 386: "MULL"
+       // 386: "SETEQ"
+       // 386: -"SET(LS|HI)"
+       // 386: -".*CALL"
+       // 386: -"RO[RL]"
         // arm64: "MOVD [$]-6148914691236517205,"
         // arm64: "SMULH"
         // arm64: "ADD"
@@ -1103,7 +1149,16 @@ func div_divis6_int64(i int64) (int64, bool) {
  }
  
  func div_ndivis6_int64(i int64) (int64, bool) {
-       // 386 "CALL"
+       // 386: "ANDL [$]-1431655766,"
+       // 386: "ANDL [$]-1431655765,"
+       // 386: "MOVL [$]-1431655766,"
+       // 386: "MOVL [$]-1431655765,"
+       // 386: "SUBL" "SBBL"
+       // 386: "MULL"
+       // 386: "SETNE"
+       // 386: -"SET(LS|HI)"
+       // 386: -".*CALL"
+       // 386: -"RO[RL]"
         // arm64: "MOVD [$]-6148914691236517205,"
         // arm64: "SMULH"
         // arm64: "ADD"
author	Russ Cox <rsc@golang.org>
	Mon, 27 Oct 2025 23:41:39 +0000 (19:41 -0400)
committer	Gopher Robot <gobot@golang.org>
	Thu, 30 Oct 2025 15:04:20 +0000 (08:04 -0700)
src/cmd/compile/internal/arm/ssa.go		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/386.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/386Ops.go		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/ARM.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/ARMOps.go		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/MIPS.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/dec64.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/divmod.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/generic.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/genericOps.go		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/rulegen.go		patch \| blob \| history
src/cmd/compile/internal/ssa/opGen.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewrite386.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteARM.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteMIPS.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewritedec64.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewritedivmod.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewritegeneric.go		patch \| blob \| history
src/cmd/compile/internal/ssagen/intrinsics.go		patch \| blob \| history
src/cmd/compile/internal/ssagen/intrinsics_test.go		patch \| blob \| history
src/cmd/compile/internal/walk/expr.go		patch \| blob \| history
src/cmd/compile/internal/x86/ssa.go		patch \| blob \| history
test/codegen/divmod.go		patch \| blob \| history