]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: on amd64 use 32bits copies for 64bits copies of 32bits values
authorJorropo <jorropo.pgm@gmail.com>
Sat, 20 Dec 2025 13:11:35 +0000 (14:11 +0100)
committerGopher Robot <gobot@golang.org>
Fri, 23 Jan 2026 20:24:56 +0000 (12:24 -0800)
Fixes #76449

This saves a single byte for the REX prefix per OpCopy it triggers on.

Change-Id: I1eab364d07354555ba2f23ffd2f9c522d4a04bd0
Reviewed-on: https://go-review.googlesource.com/c/go/+/731640
Reviewed-by: Michael Pratt <mpratt@google.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Jorropo <jorropo.pgm@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/_gen/AMD64latelower.rules
src/cmd/compile/internal/ssa/_gen/ARM64latelower.rules
src/cmd/compile/internal/ssa/rewrite.go
src/cmd/compile/internal/ssa/rewriteAMD64latelower.go
src/cmd/compile/internal/ssa/rewriteARM64latelower.go
test/codegen/constants.go

index e9a566d759dc0cdfcef750e6bb9bd9236662b235..381a91e2280d5adb593fa3a066b05eda083fcc5d 100644 (file)
@@ -43,6 +43,10 @@ func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
        }
 }
 
+func isGPReg(r int16) bool {
+       return x86.REG_AL <= r && r <= x86.REG_R15
+}
+
 func isFPReg(r int16) bool {
        return x86.REG_X0 <= r && r <= x86.REG_Z31
 }
@@ -1225,14 +1229,23 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                if v.Type.IsMemory() {
                        return
                }
-               x := v.Args[0].Reg()
+               arg := v.Args[0]
+               x := arg.Reg()
                y := v.Reg()
                if v.Type.IsSIMD() {
-                       x = simdOrMaskReg(v.Args[0])
+                       x = simdOrMaskReg(arg)
                        y = simdOrMaskReg(v)
                }
                if x != y {
-                       opregreg(s, moveByRegsWidth(y, x, v.Type.Size()), y, x)
+                       width := v.Type.Size()
+                       if width == 8 && isGPReg(y) && ssa.ZeroUpper32Bits(arg, 3) {
+                               // The source was naturally zext-ed from 32 to 64 bits,
+                               // but we are asked to do a full 64-bit copy.
+                               // Save the REX prefix byte in I-CACHE by using a 32-bit move,
+                               // since it zeroes the upper 32 bits anyway.
+                               width = 4
+                       }
+                       opregreg(s, moveByRegsWidth(y, x, width), y, x)
                }
        case ssa.OpLoadReg:
                if v.Type.IsFlags() {
index ead4ec45f1e28f7bc64e500b0b3a6fc95d8cb19e..9bdb5f8d803bdbf13b6b3c037a98935222f40469 100644 (file)
@@ -8,6 +8,6 @@
 (SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
 
 // See comments in ARM64latelower.rules for why these are here.
-(MOVLQZX x) && zeroUpper32Bits(x,3) => x
-(MOVWQZX x) && zeroUpper48Bits(x,3) => x
-(MOVBQZX x) && zeroUpper56Bits(x,3) => x
+(MOVLQZX x) && ZeroUpper32Bits(x,3) => x
+(MOVWQZX x) && ZeroUpper48Bits(x,3) => x
+(MOVBQZX x) && ZeroUpper56Bits(x,3) => x
index 8c43b960b9d96423ff9e7617f9fc72da1b6684fc..7945a5454dc578f613b8fd9feb7053341d6b3136 100644 (file)
@@ -29,7 +29,7 @@
 (MOVBUreg x:((Equal|NotEqual|LessThan|LessThanU|LessThanF|LessEqual|LessEqualU|LessEqualF|GreaterThan|GreaterThanU|GreaterThanF|GreaterEqual|GreaterEqualU|GreaterEqualF) _)) => x
 
 // omit unsigned extension
-(MOVWUreg x) && zeroUpper32Bits(x, 3) => x
+(MOVWUreg x) && ZeroUpper32Bits(x, 3) => x
 
 // don't extend after proper load
 (MOVBreg  x:(MOVBload  _ _)) => (MOVDreg x)
index b4e1a7fd334699478143db79d76c401a256205e4..4b13d65618a7d3f0825c77c740b9724aaba50306 100644 (file)
@@ -1351,7 +1351,7 @@ func overlap(offset1, size1, offset2, size2 int64) bool {
 // check if value zeroes out upper 32-bit of 64-bit register.
 // depth limits recursion depth. In AMD64.rules 3 is used as limit,
 // because it catches same amount of cases as 4.
-func zeroUpper32Bits(x *Value, depth int) bool {
+func ZeroUpper32Bits(x *Value, depth int) bool {
        if x.Type.IsSigned() && x.Type.Size() < 8 {
                // If the value is signed, it might get re-sign-extended
                // during spill and restore. See issue 68227.
@@ -1368,6 +1368,8 @@ func zeroUpper32Bits(x *Value, depth int) bool {
                OpAMD64SHRL, OpAMD64SHRLconst, OpAMD64SARL, OpAMD64SARLconst,
                OpAMD64SHLL, OpAMD64SHLLconst:
                return true
+       case OpAMD64MOVQconst:
+               return uint64(uint32(x.AuxInt)) == uint64(x.AuxInt)
        case OpARM64REV16W, OpARM64REVW, OpARM64RBITW, OpARM64CLZW, OpARM64EXTRWconst,
                OpARM64MULW, OpARM64MNEGW, OpARM64UDIVW, OpARM64DIVW, OpARM64UMODW,
                OpARM64MADDW, OpARM64MSUBW, OpARM64RORW, OpARM64RORWconst:
@@ -1383,7 +1385,7 @@ func zeroUpper32Bits(x *Value, depth int) bool {
                        return false
                }
                for i := range x.Args {
-                       if !zeroUpper32Bits(x.Args[i], depth-1) {
+                       if !ZeroUpper32Bits(x.Args[i], depth-1) {
                                return false
                        }
                }
@@ -1393,14 +1395,16 @@ func zeroUpper32Bits(x *Value, depth int) bool {
        return false
 }
 
-// zeroUpper48Bits is similar to zeroUpper32Bits, but for upper 48 bits.
-func zeroUpper48Bits(x *Value, depth int) bool {
+// ZeroUpper48Bits is similar to ZeroUpper32Bits, but for upper 48 bits.
+func ZeroUpper48Bits(x *Value, depth int) bool {
        if x.Type.IsSigned() && x.Type.Size() < 8 {
                return false
        }
        switch x.Op {
        case OpAMD64MOVWQZX, OpAMD64MOVWload, OpAMD64MOVWloadidx1, OpAMD64MOVWloadidx2:
                return true
+       case OpAMD64MOVQconst, OpAMD64MOVLconst:
+               return uint64(uint16(x.AuxInt)) == uint64(x.AuxInt)
        case OpArg: // note: but not ArgIntReg
                return x.Type.Size() == 2 && x.Block.Func.Config.arch == "amd64"
        case OpPhi, OpSelect0, OpSelect1:
@@ -1410,7 +1414,7 @@ func zeroUpper48Bits(x *Value, depth int) bool {
                        return false
                }
                for i := range x.Args {
-                       if !zeroUpper48Bits(x.Args[i], depth-1) {
+                       if !ZeroUpper48Bits(x.Args[i], depth-1) {
                                return false
                        }
                }
@@ -1420,14 +1424,16 @@ func zeroUpper48Bits(x *Value, depth int) bool {
        return false
 }
 
-// zeroUpper56Bits is similar to zeroUpper32Bits, but for upper 56 bits.
-func zeroUpper56Bits(x *Value, depth int) bool {
+// ZeroUpper56Bits is similar to ZeroUpper32Bits, but for upper 56 bits.
+func ZeroUpper56Bits(x *Value, depth int) bool {
        if x.Type.IsSigned() && x.Type.Size() < 8 {
                return false
        }
        switch x.Op {
        case OpAMD64MOVBQZX, OpAMD64MOVBload, OpAMD64MOVBloadidx1:
                return true
+       case OpAMD64MOVQconst, OpAMD64MOVLconst:
+               return uint64(uint8(x.AuxInt)) == uint64(x.AuxInt)
        case OpArg: // note: but not ArgIntReg
                return x.Type.Size() == 1 && x.Block.Func.Config.arch == "amd64"
        case OpPhi, OpSelect0, OpSelect1:
@@ -1437,7 +1443,7 @@ func zeroUpper56Bits(x *Value, depth int) bool {
                        return false
                }
                for i := range x.Args {
-                       if !zeroUpper56Bits(x.Args[i], depth-1) {
+                       if !ZeroUpper56Bits(x.Args[i], depth-1) {
                                return false
                        }
                }
index 11ecb0b285a22cb6effe8df9188d4374546e48aa..531fbe1dd0117f3133315c6249ac25b153db3cb1 100644 (file)
@@ -30,11 +30,11 @@ func rewriteValueAMD64latelower(v *Value) bool {
 func rewriteValueAMD64latelower_OpAMD64MOVBQZX(v *Value) bool {
        v_0 := v.Args[0]
        // match: (MOVBQZX x)
-       // cond: zeroUpper56Bits(x,3)
+       // cond: ZeroUpper56Bits(x,3)
        // result: x
        for {
                x := v_0
-               if !(zeroUpper56Bits(x, 3)) {
+               if !(ZeroUpper56Bits(x, 3)) {
                        break
                }
                v.copyOf(x)
@@ -45,11 +45,11 @@ func rewriteValueAMD64latelower_OpAMD64MOVBQZX(v *Value) bool {
 func rewriteValueAMD64latelower_OpAMD64MOVLQZX(v *Value) bool {
        v_0 := v.Args[0]
        // match: (MOVLQZX x)
-       // cond: zeroUpper32Bits(x,3)
+       // cond: ZeroUpper32Bits(x,3)
        // result: x
        for {
                x := v_0
-               if !(zeroUpper32Bits(x, 3)) {
+               if !(ZeroUpper32Bits(x, 3)) {
                        break
                }
                v.copyOf(x)
@@ -60,11 +60,11 @@ func rewriteValueAMD64latelower_OpAMD64MOVLQZX(v *Value) bool {
 func rewriteValueAMD64latelower_OpAMD64MOVWQZX(v *Value) bool {
        v_0 := v.Args[0]
        // match: (MOVWQZX x)
-       // cond: zeroUpper48Bits(x,3)
+       // cond: ZeroUpper48Bits(x,3)
        // result: x
        for {
                x := v_0
-               if !(zeroUpper48Bits(x, 3)) {
+               if !(ZeroUpper48Bits(x, 3)) {
                        break
                }
                v.copyOf(x)
index 0fa5e26e93d0f97fbb864b2ca083bf7f786fa550..43ddb34b30dcb217b1582797f95faef357019b89 100644 (file)
@@ -653,11 +653,11 @@ func rewriteValueARM64latelower_OpARM64MOVHreg(v *Value) bool {
 func rewriteValueARM64latelower_OpARM64MOVWUreg(v *Value) bool {
        v_0 := v.Args[0]
        // match: (MOVWUreg x)
-       // cond: zeroUpper32Bits(x, 3)
+       // cond: ZeroUpper32Bits(x, 3)
        // result: x
        for {
                x := v_0
-               if !(zeroUpper32Bits(x, 3)) {
+               if !(ZeroUpper32Bits(x, 3)) {
                        break
                }
                v.copyOf(x)
index 178a106552a1f82322e892b117bf5929f1ad4d00..9b014b54b1e7d7e45ba48a9c738a98f2931f3569 100644 (file)
@@ -33,3 +33,12 @@ func contiguousMaskConstants() (out [64]uint64) {
        out[3] = 0xFFFFFFFE00000001
        return
 }
+
+func issue76449_1() (_, _, _ uint64) {
+       // amd64:-"MOVQ"
+       return 0, 0, 0
+}
+func issue76449_2() (_, _, _ uint64) {
+       // amd64:-"MOVQ"
+       return 1, 2, 1
+}