From: Michael Munday <mike.munday@ibm.com>
Date: Mon, 26 Mar 2018 16:00:29 +0000 (+0100)
Subject: cmd/compile: use 32-bit comparisons where possible on s390x
X-Git-Tag: go1.11beta1~1104
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=8afa8a3374a926eda39e1449842a1c70b1fac37f;p=gostls13.git

cmd/compile: use 32-bit comparisons where possible on s390x

We use 32-bit operations for 8- and 16-bit arithmetic, so use them
for comparisons too. This won't change performance but it is more
consistent and makes testing 8- and 16-bit comparison codegen
slightly more straightforward (for follow up CL).

Also fix a typo and add some additional double sign and zero
extension rules to remove the operations inserted by the comparison
rules.

Change-Id: I89ec1b0e09cb8be8090cf007be283ad88bba75a4
Reviewed-on: https://go-review.googlesource.com/102556
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
---

diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index 8799c716ef..280e85004c 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -223,69 +223,55 @@
 (Rsh8x8  <t> x y)  -> (SRAW <t> (MOVBreg x) (ORW <y.Type> y (NOTW <y.Type> (SUBEWcarrymask <y.Type> (CMPWUconst (MOVBZreg y) [7])))))
 
 // Lowering comparisons
-(Less64  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
-(Less32  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
-(Less16  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
-(Less8   x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
-(Less64U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
-(Less32U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
-(Less16U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
-(Less8U  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
+(Less64      x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
+(Less32      x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
+(Less(16|8)  x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
+(Less64U     x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
+(Less32U     x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
+(Less(16|8)U x y) -> (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
 // Use SETG with reversed operands to dodge NaN case.
 (Less64F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP y x))
 (Less32F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS y x))
 
-(Leq64  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
-(Leq32  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
-(Leq16  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
-(Leq8   x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
-(Leq64U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
-(Leq32U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
-(Leq16U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
-(Leq8U  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
+(Leq64      x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
+(Leq32      x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
+(Leq(16|8)  x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
+(Leq64U     x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
+(Leq32U     x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
+(Leq(16|8)U x y) -> (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
 // Use SETGE with reversed operands to dodge NaN case.
 (Leq64F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP y x))
 (Leq32F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS y x))
 
-(Greater64  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
-(Greater32  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
-(Greater16  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
-(Greater8   x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
-(Greater64U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
-(Greater32U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
-(Greater16U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
-(Greater8U  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
+(Greater64      x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
+(Greater32      x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
+(Greater(16|8)  x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
+(Greater64U     x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
+(Greater32U     x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
+(Greater(16|8)U x y) -> (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
 (Greater64F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
 (Greater32F x y) -> (MOVDGTnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
 
-(Geq64  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
-(Geq32  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
-(Geq16  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
-(Geq8   x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
-(Geq64U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
-(Geq32U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
-(Geq16U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
-(Geq8U  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
+(Geq64      x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
+(Geq32      x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
+(Geq(16|8)  x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B)reg x) (MOV(H|B)reg y)))
+(Geq64U     x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU x y))
+(Geq32U     x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU x y))
+(Geq(16|8)U x y) -> (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOV(H|B)Zreg x) (MOV(H|B)Zreg y)))
 (Geq64F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
 (Geq32F x y) -> (MOVDGEnoinv (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
 
-(Eq64  x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
-(Eq32  x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
-(Eq16  x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
-(Eq8   x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
-(EqB   x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
-(EqPtr x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
-(Eq64F x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
-(Eq32F x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
-
-(Neq64  x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
-(Neq32  x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
-(Neq16  x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
-(Neq8   x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
-(NeqB   x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
-(NeqPtr x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
-(Neq64F x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
-(Neq32F x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
+(Eq(64|Ptr) x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
+(Eq32       x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
+(Eq(16|8|B) x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y)))
+(Eq64F      x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
+(Eq32F      x y) -> (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
+
+(Neq(64|Ptr) x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP x y))
+(Neq32       x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW x y))
+(Neq(16|8|B) x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOV(H|B|B)reg x) (MOV(H|B|B)reg y)))
+(Neq64F      x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMP x y))
+(Neq32F      x y) -> (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (FCMPS x y))
 
 // Lowering loads
 (Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVDload ptr mem)
@@ -625,12 +611,19 @@
 (MOVWreg x:(MOVBreg _)) -> (MOVDreg x)
 (MOVWreg x:(MOVBZreg _)) -> (MOVDreg x)
 (MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
-(MOVWreg x:(MOVHreg _)) -> (MOVDreg x)
+(MOVWreg x:(MOVHZreg _)) -> (MOVDreg x)
 (MOVWreg x:(MOVWreg _)) -> (MOVDreg x)
 (MOVWZreg x:(MOVBZreg _)) -> (MOVDreg x)
 (MOVWZreg x:(MOVHZreg _)) -> (MOVDreg x)
 (MOVWZreg x:(MOVWZreg _)) -> (MOVDreg x)
 
+(MOVBreg (MOVBZreg x)) -> (MOVBreg x)
+(MOVBZreg (MOVBreg x)) -> (MOVBZreg x)
+(MOVHreg (MOVHZreg x)) -> (MOVHreg x)
+(MOVHZreg (MOVHreg x)) -> (MOVHZreg x)
+(MOVWreg (MOVWZreg x)) -> (MOVWreg x)
+(MOVWZreg (MOVWreg x)) -> (MOVWZreg x)
+
 // fold extensions into constants
 (MOVBreg (MOVDconst [c])) -> (MOVDconst [int64(int8(c))])
 (MOVBZreg (MOVDconst [c])) -> (MOVDconst [int64(uint8(c))])
diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go
index fd70a70952..480512aaaa 100644
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@@ -584,7 +584,7 @@ func rewriteValueS390X(v *Value) bool {
 	case OpS390XMOVHZloadidx:
 		return rewriteValueS390X_OpS390XMOVHZloadidx_0(v)
 	case OpS390XMOVHZreg:
-		return rewriteValueS390X_OpS390XMOVHZreg_0(v)
+		return rewriteValueS390X_OpS390XMOVHZreg_0(v) || rewriteValueS390X_OpS390XMOVHZreg_10(v)
 	case OpS390XMOVHload:
 		return rewriteValueS390X_OpS390XMOVHload_0(v)
 	case OpS390XMOVHloadidx:
@@ -1768,7 +1768,7 @@ func rewriteValueS390X_OpEq16_0(v *Value) bool {
 	_ = typ
 	// match: (Eq16 x y)
 	// cond:
-	// result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
+	// result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVHreg x) (MOVHreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -1780,7 +1780,7 @@ func rewriteValueS390X_OpEq16_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -1902,7 +1902,7 @@ func rewriteValueS390X_OpEq8_0(v *Value) bool {
 	_ = typ
 	// match: (Eq8 x y)
 	// cond:
-	// result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
+	// result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVBreg x) (MOVBreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -1914,7 +1914,7 @@ func rewriteValueS390X_OpEq8_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -1932,7 +1932,7 @@ func rewriteValueS390X_OpEqB_0(v *Value) bool {
 	_ = typ
 	// match: (EqB x y)
 	// cond:
-	// result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
+	// result: (MOVDEQ (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVBreg x) (MOVBreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -1944,7 +1944,7 @@ func rewriteValueS390X_OpEqB_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2000,7 +2000,7 @@ func rewriteValueS390X_OpGeq16_0(v *Value) bool {
 	_ = typ
 	// match: (Geq16 x y)
 	// cond:
-	// result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
+	// result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVHreg x) (MOVHreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2012,7 +2012,7 @@ func rewriteValueS390X_OpGeq16_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2030,7 +2030,7 @@ func rewriteValueS390X_OpGeq16U_0(v *Value) bool {
 	_ = typ
 	// match: (Geq16U x y)
 	// cond:
-	// result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
+	// result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOVHZreg x) (MOVHZreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2042,7 +2042,7 @@ func rewriteValueS390X_OpGeq16U_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHZreg, typ.UInt64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2216,7 +2216,7 @@ func rewriteValueS390X_OpGeq8_0(v *Value) bool {
 	_ = typ
 	// match: (Geq8 x y)
 	// cond:
-	// result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
+	// result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVBreg x) (MOVBreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2228,7 +2228,7 @@ func rewriteValueS390X_OpGeq8_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2246,7 +2246,7 @@ func rewriteValueS390X_OpGeq8U_0(v *Value) bool {
 	_ = typ
 	// match: (Geq8U x y)
 	// cond:
-	// result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
+	// result: (MOVDGE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOVBZreg x) (MOVBZreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2258,7 +2258,7 @@ func rewriteValueS390X_OpGeq8U_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBZreg, typ.UInt64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2305,7 +2305,7 @@ func rewriteValueS390X_OpGreater16_0(v *Value) bool {
 	_ = typ
 	// match: (Greater16 x y)
 	// cond:
-	// result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
+	// result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVHreg x) (MOVHreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2317,7 +2317,7 @@ func rewriteValueS390X_OpGreater16_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2335,7 +2335,7 @@ func rewriteValueS390X_OpGreater16U_0(v *Value) bool {
 	_ = typ
 	// match: (Greater16U x y)
 	// cond:
-	// result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
+	// result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOVHZreg x) (MOVHZreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2347,7 +2347,7 @@ func rewriteValueS390X_OpGreater16U_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHZreg, typ.UInt64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2521,7 +2521,7 @@ func rewriteValueS390X_OpGreater8_0(v *Value) bool {
 	_ = typ
 	// match: (Greater8 x y)
 	// cond:
-	// result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
+	// result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVBreg x) (MOVBreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2533,7 +2533,7 @@ func rewriteValueS390X_OpGreater8_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2551,7 +2551,7 @@ func rewriteValueS390X_OpGreater8U_0(v *Value) bool {
 	_ = typ
 	// match: (Greater8U x y)
 	// cond:
-	// result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
+	// result: (MOVDGT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOVBZreg x) (MOVBZreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2563,7 +2563,7 @@ func rewriteValueS390X_OpGreater8U_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBZreg, typ.UInt64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2770,7 +2770,7 @@ func rewriteValueS390X_OpLeq16_0(v *Value) bool {
 	_ = typ
 	// match: (Leq16 x y)
 	// cond:
-	// result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
+	// result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVHreg x) (MOVHreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2782,7 +2782,7 @@ func rewriteValueS390X_OpLeq16_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2800,7 +2800,7 @@ func rewriteValueS390X_OpLeq16U_0(v *Value) bool {
 	_ = typ
 	// match: (Leq16U x y)
 	// cond:
-	// result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
+	// result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOVHZreg x) (MOVHZreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2812,7 +2812,7 @@ func rewriteValueS390X_OpLeq16U_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHZreg, typ.UInt64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -2986,7 +2986,7 @@ func rewriteValueS390X_OpLeq8_0(v *Value) bool {
 	_ = typ
 	// match: (Leq8 x y)
 	// cond:
-	// result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
+	// result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVBreg x) (MOVBreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -2998,7 +2998,7 @@ func rewriteValueS390X_OpLeq8_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -3016,7 +3016,7 @@ func rewriteValueS390X_OpLeq8U_0(v *Value) bool {
 	_ = typ
 	// match: (Leq8U x y)
 	// cond:
-	// result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
+	// result: (MOVDLE (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOVBZreg x) (MOVBZreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -3028,7 +3028,7 @@ func rewriteValueS390X_OpLeq8U_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBZreg, typ.UInt64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -3046,7 +3046,7 @@ func rewriteValueS390X_OpLess16_0(v *Value) bool {
 	_ = typ
 	// match: (Less16 x y)
 	// cond:
-	// result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
+	// result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVHreg x) (MOVHreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -3058,7 +3058,7 @@ func rewriteValueS390X_OpLess16_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -3076,7 +3076,7 @@ func rewriteValueS390X_OpLess16U_0(v *Value) bool {
 	_ = typ
 	// match: (Less16U x y)
 	// cond:
-	// result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVHZreg x) (MOVHZreg y)))
+	// result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOVHZreg x) (MOVHZreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -3088,7 +3088,7 @@ func rewriteValueS390X_OpLess16U_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHZreg, typ.UInt64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -3262,7 +3262,7 @@ func rewriteValueS390X_OpLess8_0(v *Value) bool {
 	_ = typ
 	// match: (Less8 x y)
 	// cond:
-	// result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
+	// result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVBreg x) (MOVBreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -3274,7 +3274,7 @@ func rewriteValueS390X_OpLess8_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -3292,7 +3292,7 @@ func rewriteValueS390X_OpLess8U_0(v *Value) bool {
 	_ = typ
 	// match: (Less8U x y)
 	// cond:
-	// result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPU (MOVBZreg x) (MOVBZreg y)))
+	// result: (MOVDLT (MOVDconst [0]) (MOVDconst [1]) (CMPWU (MOVBZreg x) (MOVBZreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -3304,7 +3304,7 @@ func rewriteValueS390X_OpLess8U_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMPU, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPWU, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBZreg, typ.UInt64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -4648,7 +4648,7 @@ func rewriteValueS390X_OpNeq16_0(v *Value) bool {
 	_ = typ
 	// match: (Neq16 x y)
 	// cond:
-	// result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVHreg x) (MOVHreg y)))
+	// result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVHreg x) (MOVHreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -4660,7 +4660,7 @@ func rewriteValueS390X_OpNeq16_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVHreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -4782,7 +4782,7 @@ func rewriteValueS390X_OpNeq8_0(v *Value) bool {
 	_ = typ
 	// match: (Neq8 x y)
 	// cond:
-	// result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
+	// result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVBreg x) (MOVBreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -4794,7 +4794,7 @@ func rewriteValueS390X_OpNeq8_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -4812,7 +4812,7 @@ func rewriteValueS390X_OpNeqB_0(v *Value) bool {
 	_ = typ
 	// match: (NeqB x y)
 	// cond:
-	// result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMP (MOVBreg x) (MOVBreg y)))
+	// result: (MOVDNE (MOVDconst [0]) (MOVDconst [1]) (CMPW (MOVBreg x) (MOVBreg y)))
 	for {
 		_ = v.Args[1]
 		x := v.Args[0]
@@ -4824,7 +4824,7 @@ func rewriteValueS390X_OpNeqB_0(v *Value) bool {
 		v1 := b.NewValue0(v.Pos, OpS390XMOVDconst, typ.UInt64)
 		v1.AuxInt = 1
 		v.AddArg(v1)
-		v2 := b.NewValue0(v.Pos, OpS390XCMP, types.TypeFlags)
+		v2 := b.NewValue0(v.Pos, OpS390XCMPW, types.TypeFlags)
 		v3 := b.NewValue0(v.Pos, OpS390XMOVBreg, typ.Int64)
 		v3.AddArg(x)
 		v2.AddArg(v3)
@@ -10002,6 +10002,19 @@ func rewriteValueS390X_OpS390XMOVBZreg_10(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBZreg (MOVBreg x))
+	// cond:
+	// result: (MOVBZreg x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpS390XMOVBreg {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpS390XMOVBZreg)
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVBZreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(uint8(c))])
@@ -10410,6 +10423,19 @@ func rewriteValueS390X_OpS390XMOVBreg_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBreg (MOVBZreg x))
+	// cond:
+	// result: (MOVBreg x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpS390XMOVBZreg {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpS390XMOVBreg)
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVBreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(int8(c))])
@@ -16395,6 +16421,19 @@ func rewriteValueS390X_OpS390XMOVHZreg_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHZreg (MOVHreg x))
+	// cond:
+	// result: (MOVHZreg x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpS390XMOVHreg {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpS390XMOVHZreg)
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHZreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(uint16(c))])
@@ -16488,6 +16527,11 @@ func rewriteValueS390X_OpS390XMOVHZreg_0(v *Value) bool {
 		v0.AddArg(mem)
 		return true
 	}
+	return false
+}
+func rewriteValueS390X_OpS390XMOVHZreg_10(v *Value) bool {
+	b := v.Block
+	_ = b
 	// match: (MOVHZreg x:(MOVHloadidx [off] {sym} ptr idx mem))
 	// cond: x.Uses == 1 && clobber(x)
 	// result: @x.Block (MOVHZloadidx <v.Type> [off] {sym} ptr idx mem)
@@ -16854,6 +16898,19 @@ func rewriteValueS390X_OpS390XMOVHreg_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHreg (MOVHZreg x))
+	// cond:
+	// result: (MOVHreg x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpS390XMOVHZreg {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpS390XMOVHreg)
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(int16(c))])
@@ -16893,6 +16950,11 @@ func rewriteValueS390X_OpS390XMOVHreg_0(v *Value) bool {
 		v0.AddArg(mem)
 		return true
 	}
+	return false
+}
+func rewriteValueS390X_OpS390XMOVHreg_10(v *Value) bool {
+	b := v.Block
+	_ = b
 	// match: (MOVHreg x:(MOVHload [off] {sym} ptr mem))
 	// cond: x.Uses == 1 && clobber(x)
 	// result: @x.Block (MOVHload <v.Type> [off] {sym} ptr mem)
@@ -16919,11 +16981,6 @@ func rewriteValueS390X_OpS390XMOVHreg_0(v *Value) bool {
 		v0.AddArg(mem)
 		return true
 	}
-	return false
-}
-func rewriteValueS390X_OpS390XMOVHreg_10(v *Value) bool {
-	b := v.Block
-	_ = b
 	// match: (MOVHreg x:(MOVHZloadidx [off] {sym} ptr idx mem))
 	// cond: x.Uses == 1 && clobber(x)
 	// result: @x.Block (MOVHloadidx <v.Type> [off] {sym} ptr idx mem)
@@ -19263,6 +19320,19 @@ func rewriteValueS390X_OpS390XMOVWZreg_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVWZreg (MOVWreg x))
+	// cond:
+	// result: (MOVWZreg x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpS390XMOVWreg {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpS390XMOVWZreg)
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVWZreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(uint32(c))])
@@ -19302,6 +19372,11 @@ func rewriteValueS390X_OpS390XMOVWZreg_0(v *Value) bool {
 		v0.AddArg(mem)
 		return true
 	}
+	return false
+}
+func rewriteValueS390X_OpS390XMOVWZreg_10(v *Value) bool {
+	b := v.Block
+	_ = b
 	// match: (MOVWZreg x:(MOVWload [off] {sym} ptr mem))
 	// cond: x.Uses == 1 && clobber(x)
 	// result: @x.Block (MOVWZload <v.Type> [off] {sym} ptr mem)
@@ -19328,11 +19403,6 @@ func rewriteValueS390X_OpS390XMOVWZreg_0(v *Value) bool {
 		v0.AddArg(mem)
 		return true
 	}
-	return false
-}
-func rewriteValueS390X_OpS390XMOVWZreg_10(v *Value) bool {
-	b := v.Block
-	_ = b
 	// match: (MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem))
 	// cond: x.Uses == 1 && clobber(x)
 	// result: @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
@@ -19751,12 +19821,12 @@ func rewriteValueS390X_OpS390XMOVWreg_0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (MOVWreg x:(MOVHreg _))
+	// match: (MOVWreg x:(MOVHZreg _))
 	// cond:
 	// result: (MOVDreg x)
 	for {
 		x := v.Args[0]
-		if x.Op != OpS390XMOVHreg {
+		if x.Op != OpS390XMOVHZreg {
 			break
 		}
 		v.reset(OpS390XMOVDreg)
@@ -19780,6 +19850,19 @@ func rewriteValueS390X_OpS390XMOVWreg_10(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVWreg (MOVWZreg x))
+	// cond:
+	// result: (MOVWreg x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpS390XMOVWZreg {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpS390XMOVWreg)
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVWreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(int32(c))])