One could expect calls like
z.mant.shl(z.mant, shiftAmount)
(or higher-level-functions calls that use lhs/rhs) to be almost free
when shiftAmount = 0; and expect calls like
z.mant.shl(x.mant, 0)
to have the same cost of a x.mant -> z.mant copy. Neither of this
things are currently true.
For an 800 words nat, the first kind of calls cost ~800ns for rigth
shifts and ~3.5µs for left shift; while the second kind of calls are
doing more work than necessary by calling shlVU/shrVU.
This change makes the first kind of calls ({Shl,Shr}Same) almost free,
and the second kind of calls ({Shl,Shr}) about 30% faster.
name old time/op new time/op delta
ZeroShifts/Shl-4 3.64µs ± 3% 2.49µs ± 1% -31.55% (p=0.000 n=10+10)
ZeroShifts/ShlSame-4 3.65µs ± 1% 0.01µs ± 1% -99.85% (p=0.000 n=9+9)
ZeroShifts/Shr-4 3.65µs ± 1% 2.49µs ± 1% -31.91% (p=0.000 n=10+10)
ZeroShifts/ShrSame-4 825ns ± 0% 6ns ± 1% -99.33% (p=0.000 n=9+10)
During go test math/big, the shl zeroshift fastpath is triggered 1380
times; while the shr fastpath is triggered 153334 times(!).
Change-Id: I5f92b304a40638bd8453a86c87c58e54b337bcdf
Reviewed-on: https://go-review.googlesource.com/87660
Run-TryBot: Alberto Donizetti <alb.donizetti@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
return i*_W + uint(bits.TrailingZeros(uint(x[i])))
}
+func same(x, y nat) bool {
+ return len(x) == len(y) && len(x) > 0 && &x[0] == &y[0]
+}
+
// z = x << s
func (z nat) shl(x nat, s uint) nat {
+ if s == 0 {
+ if same(z, x) {
+ return z
+ }
+ if !alias(z, x) {
+ return z.set(x)
+ }
+ }
+
m := len(x)
if m == 0 {
return z[:0]
// z = x >> s
func (z nat) shr(x nat, s uint) nat {
+ if s == 0 {
+ if same(z, x) {
+ return z
+ }
+ if !alias(z, x) {
+ return z.set(x)
+ }
+ }
+
m := len(x)
n := m - int(s/_W)
if n <= 0 {
}
}
+func BenchmarkZeroShifts(b *testing.B) {
+ x := rndNat(800)
+
+ b.Run("Shl", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var z nat
+ z.shl(x, 0)
+ }
+ })
+ b.Run("ShlSame", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ x.shl(x, 0)
+ }
+ })
+
+ b.Run("Shr", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ var z nat
+ z.shr(x, 0)
+ }
+ })
+ b.Run("ShrSame", func(b *testing.B) {
+ for i := 0; i < b.N; i++ {
+ x.shr(x, 0)
+ }
+ })
+}
+
type modWTest struct {
in string
dividend string