]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: use BMI1 instructions for GOAMD64=v3 and higher
authorMatthew Dempsky <mdempsky@google.com>
Sat, 3 Jul 2021 04:02:30 +0000 (21:02 -0700)
committerMatthew Dempsky <mdempsky@google.com>
Wed, 22 Sep 2021 00:15:27 +0000 (00:15 +0000)
BMI1 includes four instructions (ANDN, BLSI, BLSMSK, BLSR) that are
easy to peephole optimize, and which GCC always seems to favor using
when available and applicable.

Updates #45453.

Change-Id: I0274184057058f5c579e5bc3ea9c414396d3cf46
Reviewed-on: https://go-review.googlesource.com/c/go/+/351130
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
Trust: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/gen/AMD64.rules
src/cmd/compile/internal/ssa/gen/AMD64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/bmi.go [new file with mode: 0644]

index 30131bd5590ee8292e8de10b7fa12eff1b520995..68266d35d67a95f12e93edbc55fb7c4e7c1d0122 100644 (file)
@@ -263,6 +263,23 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                p.To.Reg = lo
                p.SetFrom3Reg(hi)
 
+       case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
+               ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
+               ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[0].Reg()
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
+
+       case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[0].Reg()
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
+               p.SetFrom3Reg(v.Args[1].Reg())
+
        case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
                // Arg[0] (the dividend) is in AX.
                // Arg[1] (the divisor) can be in any other register.
index bfed3bc7fdaa0340958598f7feb3bf00b8398c80..edb1a4869a34c55c9ce61d1941d831da9487f1e4 100644 (file)
 
 // Recognize bit clearing: a &^= 1<<b
 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
+(ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
 (ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
     => (BTRQconst [int8(log32(^c))] x)
 (ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
 // Prefetch instructions
 (PrefetchCache ...)   => (PrefetchT0 ...)
 (PrefetchCacheStreamed ...) => (PrefetchNTA ...)
+
+// CPUID feature: BMI1.
+(AND(Q|L) x (NOT(Q|L) y))           && buildcfg.GOAMD64 >= 3 => (ANDN(Q|L) x y)
+(AND(Q|L) x (NEG(Q|L) x))           && buildcfg.GOAMD64 >= 3 => (BLSI(Q|L) x)
+(XOR(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSMSK(Q|L) x)
+(AND(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSR(Q|L) x)
index 51cbf5f78a215ec21318d6a83177cf41c7818aa0..6e4c514bd02b845c572c97c7784e27d0e89eb4bc 100644 (file)
@@ -908,6 +908,16 @@ func init() {
                // Do prefetch arg0 address. arg0=addr, arg1=memory. Instruction variant selects locality hint
                {name: "PrefetchT0", argLength: 2, reg: prefreg, asm: "PREFETCHT0", hasSideEffects: true},
                {name: "PrefetchNTA", argLength: 2, reg: prefreg, asm: "PREFETCHNTA", hasSideEffects: true},
+
+               // CPUID feature: BMI1.
+               {name: "ANDNQ", argLength: 2, reg: gp21, asm: "ANDNQ", clobberFlags: true},     // arg0 &^ arg1
+               {name: "ANDNL", argLength: 2, reg: gp21, asm: "ANDNL", clobberFlags: true},     // arg0 &^ arg1
+               {name: "BLSIQ", argLength: 1, reg: gp11, asm: "BLSIQ", clobberFlags: true},     // arg0 & -arg0
+               {name: "BLSIL", argLength: 1, reg: gp11, asm: "BLSIL", clobberFlags: true},     // arg0 & -arg0
+               {name: "BLSMSKQ", argLength: 1, reg: gp11, asm: "BLSMSKQ", clobberFlags: true}, // arg0 ^ (arg0 - 1)
+               {name: "BLSMSKL", argLength: 1, reg: gp11, asm: "BLSMSKL", clobberFlags: true}, // arg0 ^ (arg0 - 1)
+               {name: "BLSRQ", argLength: 1, reg: gp11, asm: "BLSRQ", clobberFlags: true},     // arg0 & (arg0 - 1)
+               {name: "BLSRL", argLength: 1, reg: gp11, asm: "BLSRL", clobberFlags: true},     // arg0 & (arg0 - 1)
        }
 
        var AMD64blocks = []blockData{
index ceb0a24285e85a8fdcf82c8b0cd243b9a2c43c05..fed3bc338644d4402df4ad5b7d0826da0245b01e 100644 (file)
@@ -1033,6 +1033,14 @@ const (
        OpAMD64ORLlock
        OpAMD64PrefetchT0
        OpAMD64PrefetchNTA
+       OpAMD64ANDNQ
+       OpAMD64ANDNL
+       OpAMD64BLSIQ
+       OpAMD64BLSIL
+       OpAMD64BLSMSKQ
+       OpAMD64BLSMSKL
+       OpAMD64BLSRQ
+       OpAMD64BLSRL
 
        OpARMADD
        OpARMADDconst
@@ -13628,6 +13636,120 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "ANDNQ",
+               argLen:       2,
+               clobberFlags: true,
+               asm:          x86.AANDNQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:         "ANDNL",
+               argLen:       2,
+               clobberFlags: true,
+               asm:          x86.AANDNL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:         "BLSIQ",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          x86.ABLSIQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:         "BLSIL",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          x86.ABLSIL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:         "BLSMSKQ",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          x86.ABLSMSKQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:         "BLSMSKL",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          x86.ABLSMSKL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:         "BLSRQ",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          x86.ABLSRQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
+       {
+               name:         "BLSRL",
+               argLen:       1,
+               clobberFlags: true,
+               asm:          x86.ABLSRL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
 
        {
                name:        "ADD",
index e20161c920923724949a4bec144a5d97c551e35f..906260fb141291fca41b7389ea16791d278c0718 100644 (file)
@@ -3,6 +3,7 @@
 
 package ssa
 
+import "internal/buildcfg"
 import "math"
 import "cmd/internal/obj"
 import "cmd/compile/internal/types"
@@ -53,6 +54,10 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64ANDLload(v)
        case OpAMD64ANDLmodify:
                return rewriteValueAMD64_OpAMD64ANDLmodify(v)
+       case OpAMD64ANDNL:
+               return rewriteValueAMD64_OpAMD64ANDNL(v)
+       case OpAMD64ANDNQ:
+               return rewriteValueAMD64_OpAMD64ANDNQ(v)
        case OpAMD64ANDQ:
                return rewriteValueAMD64_OpAMD64ANDQ(v)
        case OpAMD64ANDQconst:
@@ -2759,6 +2764,55 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool {
                }
                break
        }
+       // match: (ANDL x (NOTL y))
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (ANDNL x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64NOTL {
+                               continue
+                       }
+                       y := v_1.Args[0]
+                       if !(buildcfg.GOAMD64 >= 3) {
+                               continue
+                       }
+                       v.reset(OpAMD64ANDNL)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
+       // match: (ANDL x (NEGL x))
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (BLSIL x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64NEGL || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
+                               continue
+                       }
+                       v.reset(OpAMD64BLSIL)
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
+       // match: (ANDL x (ADDLconst [-1] x))
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (BLSRL x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
+                               continue
+                       }
+                       v.reset(OpAMD64BLSRL)
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool {
@@ -3037,6 +3091,48 @@ func rewriteValueAMD64_OpAMD64ANDLmodify(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64ANDNL(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ANDNL x (SHLL (MOVLconst [1]) y))
+       // result: (BTRL x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64SHLL {
+                       break
+               }
+               y := v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0.AuxInt) != 1 {
+                       break
+               }
+               v.reset(OpAMD64BTRL)
+               v.AddArg2(x, y)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64ANDNQ(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (ANDNQ x (SHLQ (MOVQconst [1]) y))
+       // result: (BTRQ x y)
+       for {
+               x := v_0
+               if v_1.Op != OpAMD64SHLQ {
+                       break
+               }
+               y := v_1.Args[1]
+               v_1_0 := v_1.Args[0]
+               if v_1_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0.AuxInt) != 1 {
+                       break
+               }
+               v.reset(OpAMD64BTRQ)
+               v.AddArg2(x, y)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -3138,6 +3234,55 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
                }
                break
        }
+       // match: (ANDQ x (NOTQ y))
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (ANDNQ x y)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64NOTQ {
+                               continue
+                       }
+                       y := v_1.Args[0]
+                       if !(buildcfg.GOAMD64 >= 3) {
+                               continue
+                       }
+                       v.reset(OpAMD64ANDNQ)
+                       v.AddArg2(x, y)
+                       return true
+               }
+               break
+       }
+       // match: (ANDQ x (NEGQ x))
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (BLSIQ x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64NEGQ || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
+                               continue
+                       }
+                       v.reset(OpAMD64BLSIQ)
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
+       // match: (ANDQ x (ADDQconst [-1] x))
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (BLSRQ x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64ADDQconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
+                               continue
+                       }
+                       v.reset(OpAMD64BLSRQ)
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool {
@@ -26474,6 +26619,21 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool {
                }
                break
        }
+       // match: (XORL x (ADDLconst [-1] x))
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (BLSMSKL x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
+                               continue
+                       }
+                       v.reset(OpAMD64BLSMSKL)
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool {
@@ -26950,6 +27110,21 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
                }
                break
        }
+       // match: (XORQ x (ADDQconst [-1] x))
+       // cond: buildcfg.GOAMD64 >= 3
+       // result: (BLSMSKQ x)
+       for {
+               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+                       x := v_0
+                       if v_1.Op != OpAMD64ADDQconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) {
+                               continue
+                       }
+                       v.reset(OpAMD64BLSMSKQ)
+                       v.AddArg(x)
+                       return true
+               }
+               break
+       }
        return false
 }
 func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool {
diff --git a/test/codegen/bmi.go b/test/codegen/bmi.go
new file mode 100644 (file)
index 0000000..0c25e0b
--- /dev/null
@@ -0,0 +1,47 @@
+// asmcheck
+
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func andn64(x, y int64) int64 {
+       // amd64/v3:"ANDNQ"
+       return x &^ y
+}
+
+func andn32(x, y int32) int32 {
+       // amd64/v3:"ANDNL"
+       return x &^ y
+}
+
+func blsi64(x int64) int64 {
+       // amd64/v3:"BLSIQ"
+       return x & -x
+}
+
+func blsi32(x int32) int32 {
+       // amd64/v3:"BLSIL"
+       return x & -x
+}
+
+func blsmsk64(x int64) int64 {
+       // amd64/v3:"BLSMSKQ"
+       return x ^ (x - 1)
+}
+
+func blsmsk32(x int32) int32 {
+       // amd64/v3:"BLSMSKL"
+       return x ^ (x - 1)
+}
+
+func blsr64(x int64) int64 {
+       // amd64/v3:"BLSRQ"
+       return x & (x - 1)
+}
+
+func blsr32(x int32) int32 {
+       // amd64/v3:"BLSRL"
+       return x & (x - 1)
+}