]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify math/bits.Div on amd64
authorBrian Kessler <brian.m.kessler@gmail.com>
Wed, 24 Oct 2018 02:54:56 +0000 (20:54 -0600)
committerKeith Randall <khr@golang.org>
Tue, 27 Nov 2018 05:04:25 +0000 (05:04 +0000)
Note that the intrinsic implementation panics separately for overflow and
divide by zero, which matches the behavior of the pure go implementation.
There is a modest performance improvement after intrinsic implementation.

name     old time/op  new time/op  delta
Div-4    53.0ns ± 1%  47.0ns ± 0%  -11.28%  (p=0.008 n=5+5)
Div32-4  18.4ns ± 0%  18.5ns ± 1%     ~     (p=0.444 n=5+5)
Div64-4  53.3ns ± 0%  47.5ns ± 4%  -10.77%  (p=0.008 n=5+5)

Updates #28273

Change-Id: Ic1688ecc0964acace2e91bf44ef16f5fb6b6bc82
Reviewed-on: https://go-review.googlesource.com/c/144378
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/gc/go.go
src/cmd/compile/internal/gc/ssa.go
test/codegen/mathbits.go

index cb76398629742559ddfeff20312a42fe4f8b4fa6..c5ff8b6dbe865bb9af10a5d13c363fcaa04f3325 100644 (file)
@@ -300,6 +300,7 @@ var (
        panicdottypeI,
        panicindex,
        panicnildottype,
+       panicoverflow,
        panicslice,
        raceread,
        racereadrange,
index e0b4b403232157e09b9b60c5bd7f3669b01ed8a6..51fd589db99f32ca21abe0c49c3fc8152d563768 100644 (file)
@@ -82,6 +82,7 @@ func initssaconfig() {
        panicdottypeI = sysfunc("panicdottypeI")
        panicindex = sysfunc("panicindex")
        panicnildottype = sysfunc("panicnildottype")
+       panicoverflow = sysfunc("panicoverflow")
        panicslice = sysfunc("panicslice")
        raceread = sysfunc("raceread")
        racereadrange = sysfunc("racereadrange")
@@ -3487,20 +3488,29 @@ func init() {
                },
                sys.AMD64, sys.ARM64, sys.PPC64)
        alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64)
-
        addF("math/bits", "Add64",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
                },
                sys.AMD64)
        alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64)
-
        addF("math/bits", "Sub64",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
                },
                sys.AMD64)
        alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64)
+       addF("math/bits", "Div64",
+               func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+                       // check for divide-by-zero/overflow and panic with appropriate message
+                       cmpZero := s.newValue2(s.ssaOp(ONE, types.Types[TUINT64]), types.Types[TBOOL], args[2], s.zeroVal(types.Types[TUINT64]))
+                       s.check(cmpZero, panicdivide)
+                       cmpOverflow := s.newValue2(s.ssaOp(OLT, types.Types[TUINT64]), types.Types[TBOOL], args[0], args[2])
+                       s.check(cmpOverflow, panicoverflow)
+                       return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
+               },
+               sys.AMD64)
+       alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64)
 
        /******** sync/atomic ********/
 
index 85d5bdea331bf8938b38391bc82aacd456b33c40..44ab2c02b757837fc6accd93b8ea8537a40dc02f 100644 (file)
@@ -465,3 +465,17 @@ func Mul64(x, y uint64) (hi, lo uint64) {
        // ppc64le:"MULHDU","MULLD"
        return bits.Mul64(x, y)
 }
+
+// --------------- //
+//    bits.Div*    //
+// --------------- //
+
+func Div(hi, lo, x uint) (q, r uint) {
+       // amd64:"DIVQ"
+       return bits.Div(hi, lo, x)
+}
+
+func Div64(hi, lo, x uint64) (q, r uint64) {
+       // amd64:"DIVQ"
+       return bits.Div64(hi, lo, x)
+}