From: Brian Kessler Date: Wed, 24 Oct 2018 02:54:56 +0000 (-0600) Subject: cmd/compile: intrinsify math/bits.Div on amd64 X-Git-Tag: go1.12beta1~264 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=319787a528284aefe23424056a19bda71f7cc2b1;p=gostls13.git cmd/compile: intrinsify math/bits.Div on amd64 Note that the intrinsic implementation panics separately for overflow and divide by zero, which matches the behavior of the pure go implementation. There is a modest performance improvement after intrinsic implementation. name old time/op new time/op delta Div-4 53.0ns ± 1% 47.0ns ± 0% -11.28% (p=0.008 n=5+5) Div32-4 18.4ns ± 0% 18.5ns ± 1% ~ (p=0.444 n=5+5) Div64-4 53.3ns ± 0% 47.5ns ± 4% -10.77% (p=0.008 n=5+5) Updates #28273 Change-Id: Ic1688ecc0964acace2e91bf44ef16f5fb6b6bc82 Reviewed-on: https://go-review.googlesource.com/c/144378 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/gc/go.go b/src/cmd/compile/internal/gc/go.go index cb76398629..c5ff8b6dbe 100644 --- a/src/cmd/compile/internal/gc/go.go +++ b/src/cmd/compile/internal/gc/go.go @@ -300,6 +300,7 @@ var ( panicdottypeI, panicindex, panicnildottype, + panicoverflow, panicslice, raceread, racereadrange, diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index e0b4b40323..51fd589db9 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -82,6 +82,7 @@ func initssaconfig() { panicdottypeI = sysfunc("panicdottypeI") panicindex = sysfunc("panicindex") panicnildottype = sysfunc("panicnildottype") + panicoverflow = sysfunc("panicoverflow") panicslice = sysfunc("panicslice") raceread = sysfunc("raceread") racereadrange = sysfunc("racereadrange") @@ -3487,20 +3488,29 @@ func init() { }, sys.AMD64, sys.ARM64, sys.PPC64) alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64) - addF("math/bits", "Add64", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2]) }, sys.AMD64) alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64) - addF("math/bits", "Sub64", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2]) }, sys.AMD64) alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64) + addF("math/bits", "Div64", + func(s *state, n *Node, args []*ssa.Value) *ssa.Value { + // check for divide-by-zero/overflow and panic with appropriate message + cmpZero := s.newValue2(s.ssaOp(ONE, types.Types[TUINT64]), types.Types[TBOOL], args[2], s.zeroVal(types.Types[TUINT64])) + s.check(cmpZero, panicdivide) + cmpOverflow := s.newValue2(s.ssaOp(OLT, types.Types[TUINT64]), types.Types[TBOOL], args[0], args[2]) + s.check(cmpOverflow, panicoverflow) + return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2]) + }, + sys.AMD64) + alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64) /******** sync/atomic ********/ diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go index 85d5bdea33..44ab2c02b7 100644 --- a/test/codegen/mathbits.go +++ b/test/codegen/mathbits.go @@ -465,3 +465,17 @@ func Mul64(x, y uint64) (hi, lo uint64) { // ppc64le:"MULHDU","MULLD" return bits.Mul64(x, y) } + +// --------------- // +// bits.Div* // +// --------------- // + +func Div(hi, lo, x uint) (q, r uint) { + // amd64:"DIVQ" + return bits.Div(hi, lo, x) +} + +func Div64(hi, lo, x uint64) (q, r uint64) { + // amd64:"DIVQ" + return bits.Div64(hi, lo, x) +}