According to "Intel 64 and IA-32 Architectures Optimization Reference
Manual" Section: "3.5.1.13 Zero-Latency MOV Instructions"
MOV?ZX instructions have zero latency on newer processors.
during make.bash:
(ANDLconst [0xFF] x) -> (MOVBQZX x)
applies 422 times
(ANDLconst [0xFFFF] x) -> (MOVWQZX x)
applies 114 times
Updates #15105
Change-Id: I10933af599de3c26449c52f4b5cd859331028f39
Reviewed-on: https://go-review.googlesource.com/31639
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: David Chase <drchase@google.com>
(CMPB x (MOVLconst [c])) -> (CMPBconst x [int64(int8(c))])
(CMPB (MOVLconst [c]) x) -> (InvertFlags (CMPBconst x [int64(int8(c))]))
-// Using MOVBQZX instead of ANDQ is cheaper.
+// Using MOVZX instead of AND is cheaper.
+(ANDLconst [0xFF] x) -> (MOVBQZX x)
+(ANDLconst [0xFFFF] x) -> (MOVWQZX x)
(ANDQconst [0xFF] x) -> (MOVBQZX x)
(ANDQconst [0xFFFF] x) -> (MOVWQZX x)
(ANDQconst [0xFFFFFFFF] x) -> (MOVLQZX x)
v.AddArg(x)
return true
}
+ // match: (ANDLconst [0xFF] x)
+ // cond:
+ // result: (MOVBQZX x)
+ for {
+ if v.AuxInt != 0xFF {
+ break
+ }
+ x := v.Args[0]
+ v.reset(OpAMD64MOVBQZX)
+ v.AddArg(x)
+ return true
+ }
+ // match: (ANDLconst [0xFFFF] x)
+ // cond:
+ // result: (MOVWQZX x)
+ for {
+ if v.AuxInt != 0xFFFF {
+ break
+ }
+ x := v.Args[0]
+ v.reset(OpAMD64MOVWQZX)
+ v.AddArg(x)
+ return true
+ }
// match: (ANDLconst [c] _)
// cond: int32(c)==0
// result: (MOVLconst [0])