From 04945edd40fff4d66321a4f98c1bb070b6356008 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Alexandru=20Mo=C8=99oi?= Date: Mon, 4 Apr 2016 19:23:41 +0200 Subject: [PATCH] cmd/compile: replaces ANDQ with MOV?ZX MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Where possible replace ANDQ with MOV?ZX. Takes care that we don't regress wrt bounds checking, for example [1000]int{}[i&255]. According to "Intel 64 and IA-32 Architectures Optimization Reference Manual" Section: "3.5.1.13 Zero-Latency MOV Instructions" MOV?ZX instructions have zero latency on newer processors. Updates #15105 Change-Id: I63539fdbc5812d5563aa1ebc49eca035bd307997 Reviewed-on: https://go-review.googlesource.com/21508 Reviewed-by: Айнар Гарипов Reviewed-by: David Chase --- src/cmd/compile/internal/ssa/gen/AMD64.rules | 8 ++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 81 ++++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 4ad0f883b0..b37720eb39 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -587,6 +587,11 @@ (CMPB x (MOVBconst [c])) -> (CMPBconst x [c]) (CMPB (MOVBconst [c]) x) -> (InvertFlags (CMPBconst x [c])) +// Using MOVBQZX instead of ANDQ is cheaper. +(ANDQconst [0xFF] x) -> (MOVBQZX x) +(ANDQconst [0xFFFF] x) -> (MOVWQZX x) +(ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x) + // strength reduction // Assumes that the following costs from https://gmplib.org/~tege/x86-timing.pdf: // 1 - addq, shlq, leaq, negq @@ -1093,6 +1098,9 @@ (CMPBconst (MOVBconst [x]) [y]) && int8(x)>int8(y) && uint8(x)>uint8(y) -> (FlagGT_UGT) // Other known comparisons. +(CMPQconst (MOVBQZX _) [c]) && 0xFF < c -> (FlagLT_ULT) +(CMPQconst (MOVWQZX _) [c]) && 0xFFFF < c -> (FlagLT_ULT) +(CMPQconst (MOVLQZX _) [c]) && 0xFFFFFFFF < c -> (FlagLT_ULT) (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n -> (FlagLT_ULT) (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= int32(m) && int32(m) < int32(n) -> (FlagLT_ULT) (CMPWconst (ANDWconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < int16(n) -> (FlagLT_ULT) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 11c2de391c..a1d1e4edd9 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1838,6 +1838,42 @@ func rewriteValueAMD64_OpAMD64ANDQconst(v *Value, config *Config) bool { v.AddArg(x) return true } + // match: (ANDQconst [0xFF] x) + // cond: + // result: (MOVBQZX x) + for { + if v.AuxInt != 0xFF { + break + } + x := v.Args[0] + v.reset(OpAMD64MOVBQZX) + v.AddArg(x) + return true + } + // match: (ANDQconst [0xFFFF] x) + // cond: + // result: (MOVWQZX x) + for { + if v.AuxInt != 0xFFFF { + break + } + x := v.Args[0] + v.reset(OpAMD64MOVWQZX) + v.AddArg(x) + return true + } + // match: (ANDQconst [0xFFFFFFFF] x) + // cond: + // result: (MOVLQZX x) + for { + if v.AuxInt != 0xFFFFFFFF { + break + } + x := v.Args[0] + v.reset(OpAMD64MOVLQZX) + v.AddArg(x) + return true + } // match: (ANDQconst [0] _) // cond: // result: (MOVQconst [0]) @@ -3026,6 +3062,51 @@ func rewriteValueAMD64_OpAMD64CMPQconst(v *Value, config *Config) bool { v.reset(OpAMD64FlagGT_UGT) return true } + // match: (CMPQconst (MOVBQZX _) [c]) + // cond: 0xFF < c + // result: (FlagLT_ULT) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVBQZX { + break + } + c := v.AuxInt + if !(0xFF < c) { + break + } + v.reset(OpAMD64FlagLT_ULT) + return true + } + // match: (CMPQconst (MOVWQZX _) [c]) + // cond: 0xFFFF < c + // result: (FlagLT_ULT) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVWQZX { + break + } + c := v.AuxInt + if !(0xFFFF < c) { + break + } + v.reset(OpAMD64FlagLT_ULT) + return true + } + // match: (CMPQconst (MOVLQZX _) [c]) + // cond: 0xFFFFFFFF < c + // result: (FlagLT_ULT) + for { + v_0 := v.Args[0] + if v_0.Op != OpAMD64MOVLQZX { + break + } + c := v.AuxInt + if !(0xFFFFFFFF < c) { + break + } + v.reset(OpAMD64FlagLT_ULT) + return true + } // match: (CMPQconst (ANDQconst _ [m]) [n]) // cond: 0 <= m && m < n // result: (FlagLT_ULT) -- 2.48.1