]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: add intrinsic HasCPUFeature for checking cpu features
authorJosh Bleecher Snyder <josharian@gmail.com>
Thu, 19 Dec 2019 18:58:28 +0000 (10:58 -0800)
committerJosh Bleecher Snyder <josharian@gmail.com>
Sat, 4 Apr 2020 01:01:04 +0000 (01:01 +0000)
Before using some CPU instructions, we must check for their presence.
We use global variables in the runtime package to record features.

Prior to this CL, we issued a regular memory load for these features.
The downside to this is that, because it is a regular memory load,
it cannot be hoisted out of loops or otherwise reordered with other loads.

This CL introduces a new intrinsic just for checking cpu features.
It still ends up resulting in a memory load, but that memory load can
now be floated to the entry block and rematerialized as needed.

One downside is that the regular load could be combined with the comparison
into a CMPBconstload+NE. This new intrinsic cannot; it generates MOVB+TESTB+NE.
(It is possible that MOVBQZX+TESTQ+NE would be better.)

This CL does only amd64. It is easy to extend to other architectures.

For the benchmark in #36196, on my machine, this offers a mild speedup.

name      old time/op  new time/op  delta
FMA-8     1.39ns ± 6%  1.29ns ± 9%  -7.19%  (p=0.000 n=97+96)
NonFMA-8  2.03ns ±11%  2.04ns ±12%    ~     (p=0.618 n=99+98)

Updates #15808
Updates #36196

Change-Id: I75e2fcfcf5a6df1bdb80657a7143bed69fca6deb
Reviewed-on: https://go-review.googlesource.com/c/go/+/212360
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Giovanni Bajo <rasky@develer.com>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/gen/genericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/mathbits.go

index 210ac130928bafeb9c4561f14265a3e800b6eb20..4ce81592f47b98640dfefc2f3ef4d192a193c880 100644 (file)
@@ -902,6 +902,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[0].Reg()
                gc.AddrAuto(&p.To, v)
+       case ssa.OpAMD64LoweredHasCPUFeature:
+               p := s.Prog(x86.AMOVB)
+               p.From.Type = obj.TYPE_MEM
+               gc.AddAux(&p.From, v)
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
        case ssa.OpAMD64LoweredGetClosurePtr:
                // Closure pointer is DX.
                gc.CheckLoweredGetClosurePtr(v)
index 00587aa3bfcaeccb8ac5b3c8bec2c4d364eaa94e..d423c3268da5d3c9d2d355cc9737763df4e8fc9e 100644 (file)
@@ -3595,8 +3595,7 @@ func init() {
                                s.vars[n] = s.load(types.Types[TFLOAT64], a)
                                return s.variable(n, types.Types[TFLOAT64])
                        }
-                       addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), x86HasFMA, s.sb)
-                       v := s.load(types.Types[TBOOL], addr)
+                       v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[TBOOL], x86HasFMA)
                        b := s.endBlock()
                        b.Kind = ssa.BlockIf
                        b.SetControl(v)
@@ -3661,8 +3660,7 @@ func init() {
 
        makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
-                       addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), x86HasSSE41, s.sb)
-                       v := s.load(types.Types[TBOOL], addr)
+                       v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[TBOOL], x86HasSSE41)
                        b := s.endBlock()
                        b.Kind = ssa.BlockIf
                        b.SetControl(v)
@@ -3869,8 +3867,7 @@ func init() {
 
        makeOnesCountAMD64 := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
-                       addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), x86HasPOPCNT, s.sb)
-                       v := s.load(types.Types[TBOOL], addr)
+                       v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[TBOOL], x86HasPOPCNT)
                        b := s.endBlock()
                        b.Kind = ssa.BlockIf
                        b.SetControl(v)
index 2c9fe4a59bd6cb0d580805271ac0515a3ebf47eb..7a2c148699b2926d1e4621e0d5085d2b9cc3fc56 100644 (file)
 (GetClosurePtr ...) -> (LoweredGetClosurePtr ...)
 (GetCallerPC ...) -> (LoweredGetCallerPC ...)
 (GetCallerSP ...) -> (LoweredGetCallerSP ...)
+(HasCPUFeature ...) -> (LoweredHasCPUFeature ...)
 (Addr ...) -> (LEAQ ...)
 (LocalAddr {sym} base _) -> (LEAQ {sym} base)
 
index bf949abc2007d826606a7985f45b68378f7272ae..b32f12341803ad246fda71ec85262b5d00dabcd7 100644 (file)
@@ -738,6 +738,8 @@ func init() {
                // It saves all GP registers if necessary, but may clobber others.
                {name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), buildReg("AX CX DX BX BP SI R8 R9")}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
 
+               {name: "LoweredHasCPUFeature", argLength: 0, reg: gp01, rematerializeable: true, typ: "bool", aux: "Sym", symEffect: "None"},
+
                // There are three of these functions so that they can have three different register inputs.
                // When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
                // default registers to match so we don't need to copy registers around unnecessarily.
index 2892a0b3cf60e198469b10dd5769939f361c1423..15acbf5b428af732a1e1a64874336a3d12ba9805 100644 (file)
@@ -378,6 +378,8 @@ var genericOps = []opData{
        // arch-dependent), and is not a safe-point.
        {name: "WB", argLength: 3, typ: "Mem", aux: "Sym", symEffect: "None"}, // arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
 
+       {name: "HasCPUFeature", argLength: 0, typ: "bool", aux: "Sym", symEffect: "None"}, // aux=place that this feature flag can be loaded from
+
        // PanicBounds and PanicExtend generate a runtime panic.
        // Their arguments provide index values to use in panic messages.
        // Both PanicBounds and PanicExtend have an AuxInt value from the BoundsKind type (in ../op.go).
index bf48bff8f127a8302148fb0937f57abcf7356692..e8d1b841c8eaf543b7998f5542ce1f0d99962b8a 100644 (file)
@@ -885,6 +885,7 @@ const (
        OpAMD64LoweredGetCallerSP
        OpAMD64LoweredNilCheck
        OpAMD64LoweredWB
+       OpAMD64LoweredHasCPUFeature
        OpAMD64LoweredPanicBoundsA
        OpAMD64LoweredPanicBoundsB
        OpAMD64LoweredPanicBoundsC
@@ -2596,6 +2597,7 @@ const (
        OpMoveWB
        OpZeroWB
        OpWB
+       OpHasCPUFeature
        OpPanicBounds
        OpPanicExtend
        OpClosureCall
@@ -11650,6 +11652,18 @@ var opcodeTable = [...]opInfo{
                        clobbers: 4294901760, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
                },
        },
+       {
+               name:              "LoweredHasCPUFeature",
+               auxType:           auxSym,
+               argLen:            0,
+               rematerializeable: true,
+               symEffect:         SymNone,
+               reg: regInfo{
+                       outputs: []outputInfo{
+                               {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+                       },
+               },
+       },
        {
                name:    "LoweredPanicBoundsA",
                auxType: auxInt64,
@@ -32979,6 +32993,13 @@ var opcodeTable = [...]opInfo{
                symEffect: SymNone,
                generic:   true,
        },
+       {
+               name:      "HasCPUFeature",
+               auxType:   auxSym,
+               argLen:    0,
+               symEffect: SymNone,
+               generic:   true,
+       },
        {
                name:    "PanicBounds",
                auxType: auxInt64,
index d6ea57d6496596790558ad33a124e17fe4cf77ef..1f147c9eb5c87896362b20708b189c09febd57ba 100644 (file)
@@ -786,6 +786,9 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpGreater32F(v)
        case OpGreater64F:
                return rewriteValueAMD64_OpGreater64F(v)
+       case OpHasCPUFeature:
+               v.Op = OpAMD64LoweredHasCPUFeature
+               return true
        case OpHmul32:
                v.Op = OpAMD64HMULL
                return true
index e405d6b1d2e40359cc1544a089ae9692a80318d9..8bd6242b1eedb571930cd3086c92ab48da0a1f98 100644 (file)
@@ -110,8 +110,9 @@ func Len8(n uint8) int {
 //    bits.OnesCount    //
 // -------------------- //
 
+// amd64:".*x86HasPOPCNT"
 func OnesCount(n uint) int {
-       // amd64:"POPCNTQ",".*x86HasPOPCNT"
+       // amd64:"POPCNTQ"
        // arm64:"VCNT","VUADDLV"
        // s390x:"POPCNT"
        // ppc64:"POPCNTD"
@@ -120,8 +121,9 @@ func OnesCount(n uint) int {
        return bits.OnesCount(n)
 }
 
+// amd64:".*x86HasPOPCNT"
 func OnesCount64(n uint64) int {
-       // amd64:"POPCNTQ",".*x86HasPOPCNT"
+       // amd64:"POPCNTQ"
        // arm64:"VCNT","VUADDLV"
        // s390x:"POPCNT"
        // ppc64:"POPCNTD"
@@ -130,8 +132,9 @@ func OnesCount64(n uint64) int {
        return bits.OnesCount64(n)
 }
 
+// amd64:".*x86HasPOPCNT"
 func OnesCount32(n uint32) int {
-       // amd64:"POPCNTL",".*x86HasPOPCNT"
+       // amd64:"POPCNTL"
        // arm64:"VCNT","VUADDLV"
        // s390x:"POPCNT"
        // ppc64:"POPCNTW"
@@ -140,8 +143,9 @@ func OnesCount32(n uint32) int {
        return bits.OnesCount32(n)
 }
 
+// amd64:".*x86HasPOPCNT"
 func OnesCount16(n uint16) int {
-       // amd64:"POPCNTL",".*x86HasPOPCNT"
+       // amd64:"POPCNTL"
        // arm64:"VCNT","VUADDLV"
        // s390x:"POPCNT"
        // ppc64:"POPCNTW"