]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: on AMD64, prefer XOR/AND for (x & 1) == 0 check
authorJakub Ciolek <jakub@ciolek.dev>
Tue, 7 Jan 2025 06:00:24 +0000 (07:00 +0100)
committerGopher Robot <gobot@golang.org>
Mon, 3 Feb 2025 16:42:01 +0000 (08:42 -0800)
It's shorter to encode. Additionally, XOR and AND generally
have higher throughput than BT/SET*.

compilecmp:

runtime
runtime.(*sweepClass).split 58 -> 56  (-3.45%)
runtime.sweepClass.split 14 -> 11  (-21.43%)

runtime [cmd/compile]
runtime.(*sweepClass).split 58 -> 56  (-3.45%)
runtime.sweepClass.split 14 -> 11  (-21.43%)

strconv
strconv.ryuFtoaShortest changed

strconv [cmd/compile]
strconv.ryuFtoaShortest changed

math/big
math/big.(*Int).MulRange 255 -> 252  (-1.18%)

testing/quick
testing/quick.sizedValue changed

internal/fuzz
internal/fuzz.(*pcgRand).bool 69 -> 70  (+1.45%)

cmd/internal/obj/x86
cmd/internal/obj/x86.(*AsmBuf).asmevex changed

math/big [cmd/compile]
math/big.(*Int).MulRange 255 -> 252  (-1.18%)

cmd/internal/obj/x86 [cmd/compile]
cmd/internal/obj/x86.(*AsmBuf).asmevex changed

net/http
net/http.(*http2stream).isPushed 11 -> 10  (-9.09%)

cmd/vendor/github.com/google/pprof/internal/binutils
cmd/vendor/github.com/google/pprof/internal/binutils.(*file).computeBase changed

Change-Id: I9cb2987eb263c85ee4e93d6f8455c91a55273173
Reviewed-on: https://go-review.googlesource.com/c/go/+/640975
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/cmd/compile/internal/ssa/_gen/AMD64.rules
src/cmd/compile/internal/ssa/rewriteAMD64.go
test/codegen/bool.go

index ce9a6e99140a29e4b991653c46cdef21cc4ef78a..716f4f1c32d15303de4664b402913e6e9c26a181 100644 (file)
 // x & 1 != 0 -> x & 1
 (SETNE (TEST(B|W)const [1] x)) => (AND(L|L)const [1] x)
 (SETB (BT(L|Q)const [0] x)) => (AND(L|Q)const [1] x)
+// x & 1 == 0 -> (x & 1) ^ 1
+(SETAE (BT(L|Q)const [0] x)) => (XORLconst [1] (ANDLconst <typ.Bool> [1] x))
 
 // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded
 // Note that BTx instructions use the carry bit, so we need to convert tests for zero flag
index f17c4be516077523df7ab1cda771944574cc2122..d62f38f0e2f59493ed9fdde89afd84b6d3ca1606 100644 (file)
@@ -16503,6 +16503,8 @@ func rewriteValueAMD64_OpAMD64SETA(v *Value) bool {
 }
 func rewriteValueAMD64_OpAMD64SETAE(v *Value) bool {
        v_0 := v.Args[0]
+       b := v.Block
+       typ := &b.Func.Config.Types
        // match: (SETAE (TESTQ x x))
        // result: (ConstBool [true])
        for {
@@ -16559,6 +16561,36 @@ func rewriteValueAMD64_OpAMD64SETAE(v *Value) bool {
                v.AuxInt = boolToAuxInt(true)
                return true
        }
+       // match: (SETAE (BTLconst [0] x))
+       // result: (XORLconst [1] (ANDLconst <typ.Bool> [1] x))
+       for {
+               if v_0.Op != OpAMD64BTLconst || auxIntToInt8(v_0.AuxInt) != 0 {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64XORLconst)
+               v.AuxInt = int32ToAuxInt(1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool)
+               v0.AuxInt = int32ToAuxInt(1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
+       // match: (SETAE (BTQconst [0] x))
+       // result: (XORLconst [1] (ANDLconst <typ.Bool> [1] x))
+       for {
+               if v_0.Op != OpAMD64BTQconst || auxIntToInt8(v_0.AuxInt) != 0 {
+                       break
+               }
+               x := v_0.Args[0]
+               v.reset(OpAMD64XORLconst)
+               v.AuxInt = int32ToAuxInt(1)
+               v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool)
+               v0.AuxInt = int32ToAuxInt(1)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (SETAE (InvertFlags x))
        // result: (SETBE x)
        for {
index 164ca1b2246aa37522a6b23d50e67a8903179128..2024759a5c530f7d7296648de9d58432dc7960f8 100644 (file)
@@ -47,6 +47,7 @@ func convertNeqBool32(x uint32) bool {
 
 func convertEqBool32(x uint32) bool {
        // ppc64x:"RLDICL",-"CMPW","XOR",-"ISEL"
+       // amd64:"ANDL","XORL",-"BTL",-"SETCC"
        return x&1 == 0
 }
 
@@ -57,6 +58,7 @@ func convertNeqBool64(x uint64) bool {
 
 func convertEqBool64(x uint64) bool {
        // ppc64x:"RLDICL","XOR",-"CMP",-"ISEL"
+       // amd64:"ANDL","XORL",-"BTL",-"SETCC"
        return x&1 == 0
 }