]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify math/big.mulWW on ppc64x
authorCarlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Tue, 16 Oct 2018 01:29:05 +0000 (22:29 -0300)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Mon, 22 Oct 2018 13:03:28 +0000 (13:03 +0000)
This change implements mulWW as an intrinsic for ppc64x. Performance
numbers below:

name                            old time/op    new time/op    delta
QuoRem                            4.54µs ±45%    3.22µs ± 0%  -29.22%  (p=0.029 n=4+4)
ModSqrt225_Tonelli                 765µs ± 3%     757µs ± 0%   -1.02%  (p=0.029 n=4+4)
ModSqrt225_3Mod4                   231µs ± 0%     231µs ± 0%   -0.10%  (p=0.029 n=4+4)
ModSqrt231_Tonelli                 789µs ± 0%     788µs ± 0%   -0.14%  (p=0.029 n=4+4)
ModSqrt231_5Mod8                   267µs ± 0%     267µs ± 0%   -0.13%  (p=0.029 n=4+4)
Sqrt                              49.5µs ±17%    45.3µs ± 0%   -8.48%  (p=0.029 n=4+4)
IntSqr/1                          32.2ns ±22%    24.2ns ± 0%  -24.79%  (p=0.029 n=4+4)
IntSqr/2                          60.6ns ± 0%    60.9ns ± 0%   +0.50%  (p=0.029 n=4+4)
IntSqr/3                          82.8ns ± 0%    83.3ns ± 0%   +0.51%  (p=0.029 n=4+4)
IntSqr/5                           122ns ± 0%     121ns ± 0%   -1.22%  (p=0.029 n=4+4)
IntSqr/8                           227ns ± 0%     226ns ± 0%   -0.44%  (p=0.029 n=4+4)
IntSqr/10                          300ns ± 0%     298ns ± 0%   -0.67%  (p=0.029 n=4+4)
IntSqr/20                         1.02µs ± 0%    0.89µs ± 0%  -13.08%  (p=0.029 n=4+4)
IntSqr/30                         1.73µs ± 0%    1.51µs ± 0%  -12.73%  (p=0.029 n=4+4)
IntSqr/50                         3.69µs ± 1%    3.29µs ± 0%  -10.70%  (p=0.029 n=4+4)
IntSqr/80                         7.64µs ± 0%    7.04µs ± 0%   -7.91%  (p=0.029 n=4+4)
IntSqr/100                        11.1µs ± 0%    10.3µs ± 0%   -7.04%  (p=0.029 n=4+4)
IntSqr/200                        37.9µs ± 0%    36.4µs ± 0%   -4.13%  (p=0.029 n=4+4)
IntSqr/300                        69.4µs ± 0%    66.0µs ± 0%   -4.94%  (p=0.029 n=4+4)
IntSqr/500                         174µs ± 0%     168µs ± 0%   -3.10%  (p=0.029 n=4+4)
IntSqr/800                         347µs ± 0%     333µs ± 0%   -4.06%  (p=0.029 n=4+4)
IntSqr/1000                        524µs ± 0%     507µs ± 0%   -3.21%  (p=0.029 n=4+4)

Change-Id: If067452f5b6579ad3a2e9daa76a7ffe6fceae1bb
Reviewed-on: https://go-review.googlesource.com/c/143217
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Giovanni Bajo <rasky@develer.com>
src/cmd/compile/internal/gc/ssa.go

index cecba59b0a6a0d6515ea5d635fb2c3607094049d..2ce59097af36a4c9b063ca42ce2707991880d9be 100644 (file)
@@ -3499,7 +3499,7 @@ func init() {
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1])
                },
-               sys.ArchAMD64, sys.ArchARM64)
+               sys.ArchAMD64, sys.ArchARM64, sys.ArchPPC64LE, sys.ArchPPC64)
        add("math/big", "divWW",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])