From: Jakub Ciolek Date: Tue, 7 Jan 2025 06:00:24 +0000 (+0100) Subject: cmd/compile: on AMD64, prefer XOR/AND for (x & 1) == 0 check X-Git-Tag: go1.25rc1~1200 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=e57769d5ad251a26d7676cd7a63e5332b26a35f0;p=gostls13.git cmd/compile: on AMD64, prefer XOR/AND for (x & 1) == 0 check It's shorter to encode. Additionally, XOR and AND generally have higher throughput than BT/SET*. compilecmp: runtime runtime.(*sweepClass).split 58 -> 56 (-3.45%) runtime.sweepClass.split 14 -> 11 (-21.43%) runtime [cmd/compile] runtime.(*sweepClass).split 58 -> 56 (-3.45%) runtime.sweepClass.split 14 -> 11 (-21.43%) strconv strconv.ryuFtoaShortest changed strconv [cmd/compile] strconv.ryuFtoaShortest changed math/big math/big.(*Int).MulRange 255 -> 252 (-1.18%) testing/quick testing/quick.sizedValue changed internal/fuzz internal/fuzz.(*pcgRand).bool 69 -> 70 (+1.45%) cmd/internal/obj/x86 cmd/internal/obj/x86.(*AsmBuf).asmevex changed math/big [cmd/compile] math/big.(*Int).MulRange 255 -> 252 (-1.18%) cmd/internal/obj/x86 [cmd/compile] cmd/internal/obj/x86.(*AsmBuf).asmevex changed net/http net/http.(*http2stream).isPushed 11 -> 10 (-9.09%) cmd/vendor/github.com/google/pprof/internal/binutils cmd/vendor/github.com/google/pprof/internal/binutils.(*file).computeBase changed Change-Id: I9cb2987eb263c85ee4e93d6f8455c91a55273173 Reviewed-on: https://go-review.googlesource.com/c/go/+/640975 Reviewed-by: Cherry Mui Reviewed-by: Keith Randall Reviewed-by: Keith Randall Auto-Submit: Keith Randall LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index ce9a6e9914..716f4f1c32 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -629,6 +629,8 @@ // x & 1 != 0 -> x & 1 (SETNE (TEST(B|W)const [1] x)) => (AND(L|L)const [1] x) (SETB (BT(L|Q)const [0] x)) => (AND(L|Q)const [1] x) +// x & 1 == 0 -> (x & 1) ^ 1 +(SETAE (BT(L|Q)const [0] x)) => (XORLconst [1] (ANDLconst [1] x)) // Recognize bit tests: a&(1< [1] x)) + for { + if v_0.Op != OpAMD64BTLconst || auxIntToInt8(v_0.AuxInt) != 0 { + break + } + x := v_0.Args[0] + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(1) + v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool) + v0.AuxInt = int32ToAuxInt(1) + v0.AddArg(x) + v.AddArg(v0) + return true + } + // match: (SETAE (BTQconst [0] x)) + // result: (XORLconst [1] (ANDLconst [1] x)) + for { + if v_0.Op != OpAMD64BTQconst || auxIntToInt8(v_0.AuxInt) != 0 { + break + } + x := v_0.Args[0] + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(1) + v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, typ.Bool) + v0.AuxInt = int32ToAuxInt(1) + v0.AddArg(x) + v.AddArg(v0) + return true + } // match: (SETAE (InvertFlags x)) // result: (SETBE x) for { diff --git a/test/codegen/bool.go b/test/codegen/bool.go index 164ca1b224..2024759a5c 100644 --- a/test/codegen/bool.go +++ b/test/codegen/bool.go @@ -47,6 +47,7 @@ func convertNeqBool32(x uint32) bool { func convertEqBool32(x uint32) bool { // ppc64x:"RLDICL",-"CMPW","XOR",-"ISEL" + // amd64:"ANDL","XORL",-"BTL",-"SETCC" return x&1 == 0 } @@ -57,6 +58,7 @@ func convertNeqBool64(x uint64) bool { func convertEqBool64(x uint64) bool { // ppc64x:"RLDICL","XOR",-"CMP",-"ISEL" + // amd64:"ANDL","XORL",-"BTL",-"SETCC" return x&1 == 0 }