From f98dd299103f66b86bbef402a63a0c6f8fc8b11e Mon Sep 17 00:00:00 2001
From: "Paul E. Murphy" <murp@ibm.com>
Date: Tue, 27 Sep 2022 13:13:10 -0500
Subject: [PATCH] cmd/compile: rework unbounded shift lowering on PPC64

This reduces unbounded shift latency by one cycle, and may
generate less instructions in some cases.

When there is a choice whether to use doubleword or word shifts, use
doubleword shifts. Doubleword shifts have fewer hardware scheduling
restrictions across P8/P9/P10.

Likewise, rework the shift sequence to allow the compare/shift/overshift
values to compute in parallel, then choose the correct value.

Some ANDCCconst rules also need reworked to ensure they simplify when
used for their flag value. This commonly occurs when prove fails to
identify a bounded shift (e.g foo32<<uint(x&31)).

Change-Id: Ifc6ff4a865d68675e57745056db414b0eb6f2d34
Reviewed-on: https://go-review.googlesource.com/c/go/+/442597
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Than McIntosh <thanm@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
---
 src/cmd/compile/internal/ssa/_gen/PPC64.rules |  102 +-
 src/cmd/compile/internal/ssa/rewritePPC64.go  | 1623 +++++++++--------
 src/cmd/compile/internal/test/shift_test.go   |   89 +
 3 files changed, 953 insertions(+), 861 deletions(-)
diff --git a/src/cmd/compile/internal/ssa/_gen/PPC64.rules b/src/cmd/compile/internal/ssa/_gen/PPC64.rules
index ba7347f31d..1c8142cd81 100644
--- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules
@@ -172,61 +172,60 @@
 // Lower bounded shifts first. No need to check shift value.
 (Lsh64x(64|32|16|8)  x y) && shiftIsBounded(v) => (SLD x y)
 (Lsh32x(64|32|16|8)  x y) && shiftIsBounded(v) => (SLW x y)
-(Lsh16x(64|32|16|8)  x y) && shiftIsBounded(v) => (SLW x y)
-(Lsh8x(64|32|16|8)   x y) && shiftIsBounded(v) => (SLW x y)
+(Lsh16x(64|32|16|8)  x y) && shiftIsBounded(v) => (SLD x y)
+(Lsh8x(64|32|16|8)   x y) && shiftIsBounded(v) => (SLD x y)
 (Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRD x y)
 (Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRW x y)
-(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRW (MOVHZreg x) y)
-(Rsh8Ux(64|32|16|8)  x y) && shiftIsBounded(v) => (SRW (MOVBZreg x) y)
+(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SRD (MOVHZreg x) y)
+(Rsh8Ux(64|32|16|8)  x y) && shiftIsBounded(v) => (SRD (MOVBZreg x) y)
 (Rsh64x(64|32|16|8)  x y) && shiftIsBounded(v) => (SRAD x y)
 (Rsh32x(64|32|16|8)  x y) && shiftIsBounded(v) => (SRAW x y)
-(Rsh16x(64|32|16|8)  x y) && shiftIsBounded(v) => (SRAW (MOVHreg x) y)
-(Rsh8x(64|32|16|8)   x y) && shiftIsBounded(v) => (SRAW (MOVBreg x) y)
+(Rsh16x(64|32|16|8)  x y) && shiftIsBounded(v) => (SRAD (MOVHreg x) y)
+(Rsh8x(64|32|16|8)   x y) && shiftIsBounded(v) => (SRAD (MOVBreg x) y)
+
+// Unbounded shifts. Go shifts saturate to 0 or -1 when shifting beyond the number of
+// bits in a type, PPC64 shifts do not (see the ISA for details).
+//
+// Note, y is always non-negative.
+//
+// Note, ISELZ is intentionally not used in lower. Where possible, ISEL is converted to ISELZ in late lower
+// after all the ISEL folding rules have been exercised.
+
+((Rsh64U|Lsh64)x64 <t> x y)  => (ISEL [0] (S(R|L)D <t> x y) (MOVDconst [0])        (CMPUconst y [64]))
+((Rsh64U|Lsh64)x32 <t> x y)  => (ISEL [0] (S(R|L)D <t> x y) (MOVDconst [0])        (CMPWUconst y [64]))
+((Rsh64U|Lsh64)x16 <t> x y)  => (ISEL [2] (S(R|L)D <t> x y) (MOVDconst [0])        (Select1 <types.TypeFlags> (ANDCCconst [0xFFC0] y)))
+((Rsh64U|Lsh64)x8  <t> x y)  => (ISEL [2] (S(R|L)D <t> x y) (MOVDconst [0])        (Select1 <types.TypeFlags> (ANDCCconst [0x00C0] y)))
+(Rsh64x(64|32)     <t> x y)  => (ISEL [0] (SRAD    <t> x y) (SRADconst <t> x [63]) (CMP(U|WU)const y [64]))
+(Rsh64x16          <t> x y)  => (ISEL [2] (SRAD    <t> x y) (SRADconst <t> x [63]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFC0] y)))
+(Rsh64x8           <t> x y)  => (ISEL [2] (SRAD    <t> x y) (SRADconst <t> x [63]) (Select1 <types.TypeFlags> (ANDCCconst [0x00C0] y)))
+
+((Rsh32U|Lsh32)x64 <t> x y)  => (ISEL [0] (S(R|L)W <t> x y) (MOVDconst [0])        (CMPUconst y [32]))
+((Rsh32U|Lsh32)x32 <t> x y)  => (ISEL [0] (S(R|L)W <t> x y) (MOVDconst [0])        (CMPWUconst y [32]))
+((Rsh32U|Lsh32)x16 <t> x y)  => (ISEL [2] (S(R|L)W <t> x y) (MOVDconst [0])        (Select1 <types.TypeFlags> (ANDCCconst [0xFFE0] y)))
+((Rsh32U|Lsh32)x8  <t> x y)  => (ISEL [2] (S(R|L)W <t> x y) (MOVDconst [0])        (Select1 <types.TypeFlags> (ANDCCconst [0x00E0] y)))
+(Rsh32x(64|32)     <t> x y)  => (ISEL [0] (SRAW    <t> x y) (SRAWconst <t> x [31]) (CMP(U|WU)const y [32]))
+(Rsh32x16          <t> x y)  => (ISEL [2] (SRAW    <t> x y) (SRAWconst <t> x [31]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFE0] y)))
+(Rsh32x8           <t> x y)  => (ISEL [2] (SRAW    <t> x y) (SRAWconst <t> x [31]) (Select1 <types.TypeFlags> (ANDCCconst [0x00E0] y)))
+
+((Rsh16U|Lsh16)x64 <t> x y) => (ISEL [0] (S(R|L)D  <t> (MOVHZreg x) y) (MOVDconst [0])                   (CMPUconst  y [16]))
+((Rsh16U|Lsh16)x32 <t> x y) => (ISEL [0] (S(R|L)D  <t> (MOVHZreg x) y) (MOVDconst [0])                   (CMPWUconst y [16]))
+((Rsh16U|Lsh16)x16 <t> x y) => (ISEL [2] (S(R|L)D  <t> (MOVHZreg x) y) (MOVDconst [0])                   (Select1 <types.TypeFlags> (ANDCCconst [0xFFF0] y)))
+((Rsh16U|Lsh16)x8  <t> x y) => (ISEL [2] (S(R|L)D  <t> (MOVHZreg x) y) (MOVDconst [0])                   (Select1 <types.TypeFlags> (ANDCCconst [0x00F0] y)))
+(Rsh16x(64|32)     <t> x y) => (ISEL [0] (SRAD     <t> (MOVHreg  x) y) (SRADconst <t>  (MOVHreg x) [15]) (CMP(U|WU)const  y [16]))
+(Rsh16x16          <t> x y) => (ISEL [2] (SRAD     <t> (MOVHreg  x) y) (SRADconst <t>  (MOVHreg x) [15]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF0] y)))
+(Rsh16x8           <t> x y) => (ISEL [2] (SRAD     <t> (MOVHreg  x) y) (SRADconst <t>  (MOVHreg x) [15]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F0] y)))
+
+((Rsh8U|Lsh8)x64 <t> x y) => (ISEL [0] (S(R|L)D  <t> (MOVBZreg x) y) (MOVDconst [0])                  (CMPUconst  y [8]))
+((Rsh8U|Lsh8)x32 <t> x y) => (ISEL [0] (S(R|L)D  <t> (MOVBZreg x) y) (MOVDconst [0])                  (CMPWUconst y [8]))
+((Rsh8U|Lsh8)x16 <t> x y) => (ISEL [2] (S(R|L)D  <t> (MOVBZreg x) y) (MOVDconst [0])                  (Select1 <types.TypeFlags> (ANDCCconst [0xFFF8] y)))
+((Rsh8U|Lsh8)x8  <t> x y) => (ISEL [2] (S(R|L)D  <t> (MOVBZreg x) y) (MOVDconst [0])                  (Select1 <types.TypeFlags> (ANDCCconst [0x00F8] y)))
+(Rsh8x(64|32)    <t> x y) => (ISEL [0] (SRAD     <t> (MOVBreg  x) y) (SRADconst <t>  (MOVBreg x) [7]) (CMP(U|WU)const  y [8]))
+(Rsh8x16         <t> x y) => (ISEL [2] (SRAD     <t> (MOVBreg  x) y) (SRADconst <t>  (MOVBreg x) [7]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF8] y)))
+(Rsh8x8          <t> x y) => (ISEL [2] (SRAD     <t> (MOVBreg  x) y) (SRADconst <t>  (MOVBreg x) [7]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F8] y)))
+
+// Catch bounded shifts in situations like foo<<uint(shift&63) which might not be caught by the prove pass.
+(CMP(U|WU)const [d] (Select0 (ANDCCconst z [c]))) && uint64(d) > uint64(c) => (FlagLT)
 
-// non-constant rotates
-// If shift > 64 then use -1 as shift count to shift all bits.
-((Lsh64|Rsh64|Rsh64U)x64 x y)  => (S(L|RA|R)D  x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
-((Rsh32|Rsh32U|Lsh32)x64 x y)  => (S(RA|R|L)W x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
-
-(Rsh(16|16U)x64 x y)  => (SR(AW|W) ((Sign|Zero)Ext16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
-(Lsh16x64 x y)  => (SLW  x                 (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
-
-(Rsh(8|8U)x64 x y)  => (SR(AW|W) ((Sign|Zero)Ext8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
-(Lsh8x64 x y)  => (SLW  x                (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
-
-((Rsh64|Rsh64U|Lsh64)x32 x y)  => (S(RA|R|L)D x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
-((Rsh32|Rsh32U|Lsh32)x32 x y)  => (S(RA|R|L)W x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
-
-(Rsh(16|16U)x32 x y)  => (SR(AW|W) ((Sign|Zero)Ext16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
-(Lsh16x32 x y)  => (SLW  x                 (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
-
-(Rsh(8|8U)x32 x y)  => (SR(AW|W) ((Sign|Zero)Ext8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
-(Lsh8x32 x y)  => (SLW  x                (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
-
-((Rsh64|Rsh64U|Lsh64)x16 x y)  => (S(RA|R|L)D x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [64]))))
-
-((Rsh32|Rsh32U|Lsh32)x16 x y)  => (S(RA|R|L)W x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [32]))))
-
-(Rsh(16|16U)x16 x y)  => (S(RA|R)W ((Sign|Zero)Ext16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16]))))
-(Lsh16x16 x y)  => (SLW  x                 (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16]))))
-
-(Rsh(8|8U)x16 x y)  => (SR(AW|W) ((Sign|Zero)Ext8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8]))))
-(Lsh8x16 x y)  => (SLW  x                (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8]))))
-
-
-((Rsh64|Rsh64U|Lsh64)x8 x y)  => (S(RA|R|L)D x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [64]))))
-
-((Rsh32|Rsh32U|Lsh32)x8 x y)  => (S(RA|R|L)W x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [32]))))
-
-(Rsh(16|16U)x8 x y)  => (S(RA|R)W ((Sign|Zero)Ext16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16]))))
-(Lsh16x8 x y)  => (SLW  x                 (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16]))))
-
-(Rsh(8|8U)x8 x y)  => (S(RA|R)W ((Sign|Zero)Ext8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
-(Lsh8x8 x y)  => (SLW  x                (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
-
-// Cleaning up shift ops
-(ISEL [0] (Select0 (ANDCCconst [d] y)) (MOVDconst [-1]) (CMPU (Select0 (ANDCCconst [d] y)) (MOVDconst [c]))) && c >= d => (Select0 (ANDCCconst [d] y))
-(ISEL [0] (Select0 (ANDCCconst [d] y)) (MOVDconst [-1]) (CMPUconst [c] (Select0 (ANDCCconst [d] y)))) && c >= d => (Select0 (ANDCCconst [d] y))
 (ORN x (MOVDconst [-1])) => x
 
 (S(RAD|RD|LD) x (MOVDconst [c])) => (S(RAD|RD|LD)const [c&63 | (c>>6&1*63)] x)
@@ -563,11 +562,12 @@
 (OR x (MOVDconst [c])) && isU32Bit(c) => (ORconst [c] x)
 
 // Simplify consts
-(Select0 (ANDCCconst [c] (Select0 (ANDCCconst [d] x)))) => (Select0 (ANDCCconst [c&d] x))
+(ANDCCconst [c] (Select0 (ANDCCconst [d] x))) => (ANDCCconst [c&d] x)
 (ORconst [c] (ORconst [d] x)) => (ORconst [c|d] x)
 (XORconst [c] (XORconst [d] x)) => (XORconst [c^d] x)
 (Select0 (ANDCCconst [-1] x)) => x
 (Select0 (ANDCCconst [0] _)) => (MOVDconst [0])
+(Select1 (ANDCCconst [0] _)) => (FlagEQ)
 (XORconst [0] x) => x
 (ORconst [-1] _) => (MOVDconst [-1])
 (ORconst [0] x) => x
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index bcd0a9ab4e..50c10fd9c4 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -441,6 +441,8 @@ func rewriteValuePPC64(v *Value) bool {
 		return rewriteValuePPC64_OpPPC64ADDconst(v)
 	case OpPPC64AND:
 		return rewriteValuePPC64_OpPPC64AND(v)
+	case OpPPC64ANDCCconst:
+		return rewriteValuePPC64_OpPPC64ANDCCconst(v)
 	case OpPPC64ANDN:
 		return rewriteValuePPC64_OpPPC64ANDN(v)
 	case OpPPC64CLRLSLDI:
@@ -2484,35 +2486,37 @@ func rewriteValuePPC64_OpLsh16x16(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Lsh16x16 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SLW x y)
+	// result: (SLD x y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SLW)
+		v.reset(OpPPC64SLD)
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh16x16 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16]))))
+	// match: (Lsh16x16 <t> x y)
+	// result: (ISEL [2] (SLD <t> (MOVHZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(16)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0xFFF0)
+		v4.AddArg(y)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -2523,33 +2527,35 @@ func rewriteValuePPC64_OpLsh16x32(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Lsh16x32 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SLW x y)
+	// result: (SLD x y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SLW)
+		v.reset(OpPPC64SLD)
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh16x32 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
+	// match: (Lsh16x32 <t> x y)
+	// result: (ISEL [0] (SLD <t> (MOVHZreg x) y) (MOVDconst [0]) (CMPWUconst y [16]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(16)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v3.AuxInt = int32ToAuxInt(16)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -2577,33 +2583,35 @@ func rewriteValuePPC64_OpLsh16x64(v *Value) bool {
 	}
 	// match: (Lsh16x64 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SLW x y)
+	// result: (SLD x y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SLW)
+		v.reset(OpPPC64SLD)
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh16x64 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
+	// match: (Lsh16x64 <t> x y)
+	// result: (ISEL [0] (SLD <t> (MOVHZreg x) y) (MOVDconst [0]) (CMPUconst y [16]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
 		v3.AuxInt = int64ToAuxInt(16)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -2614,35 +2622,37 @@ func rewriteValuePPC64_OpLsh16x8(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Lsh16x8 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SLW x y)
+	// result: (SLD x y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SLW)
+		v.reset(OpPPC64SLD)
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh16x8 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16]))))
+	// match: (Lsh16x8 <t> x y)
+	// result: (ISEL [2] (SLD <t> (MOVHZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(16)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0x00F0)
+		v4.AddArg(y)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -2664,24 +2674,24 @@ func rewriteValuePPC64_OpLsh32x16(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh32x16 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [32]))))
+	// match: (Lsh32x16 <t> x y)
+	// result: (ISEL [2] (SLW <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFE0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLW, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0xFFE0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -2703,22 +2713,22 @@ func rewriteValuePPC64_OpLsh32x32(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh32x32 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
+	// match: (Lsh32x32 <t> x y)
+	// result: (ISEL [0] (SLW <t> x y) (MOVDconst [0]) (CMPWUconst y [32]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLW, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v2.AuxInt = int32ToAuxInt(32)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -2757,22 +2767,22 @@ func rewriteValuePPC64_OpLsh32x64(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh32x64 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
+	// match: (Lsh32x64 <t> x y)
+	// result: (ISEL [0] (SLW <t> x y) (MOVDconst [0]) (CMPUconst y [32]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLW, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v2.AuxInt = int64ToAuxInt(32)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -2794,24 +2804,24 @@ func rewriteValuePPC64_OpLsh32x8(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh32x8 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [32]))))
+	// match: (Lsh32x8 <t> x y)
+	// result: (ISEL [2] (SLW <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00E0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLW, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0x00E0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -2833,24 +2843,24 @@ func rewriteValuePPC64_OpLsh64x16(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh64x16 x y)
-	// result: (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [64]))))
+	// match: (Lsh64x16 <t> x y)
+	// result: (ISEL [2] (SLD <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFC0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0xFFC0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -2872,22 +2882,22 @@ func rewriteValuePPC64_OpLsh64x32(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh64x32 x y)
-	// result: (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
+	// match: (Lsh64x32 <t> x y)
+	// result: (ISEL [0] (SLD <t> x y) (MOVDconst [0]) (CMPWUconst y [64]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v2.AuxInt = int32ToAuxInt(64)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -2926,22 +2936,22 @@ func rewriteValuePPC64_OpLsh64x64(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh64x64 x y)
-	// result: (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
+	// match: (Lsh64x64 <t> x y)
+	// result: (ISEL [0] (SLD <t> x y) (MOVDconst [0]) (CMPUconst y [64]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v2.AuxInt = int64ToAuxInt(64)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -2963,24 +2973,24 @@ func rewriteValuePPC64_OpLsh64x8(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh64x8 x y)
-	// result: (SLD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [64]))))
+	// match: (Lsh64x8 <t> x y)
+	// result: (ISEL [2] (SLD <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00C0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0x00C0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -2991,35 +3001,37 @@ func rewriteValuePPC64_OpLsh8x16(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Lsh8x16 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SLW x y)
+	// result: (SLD x y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SLW)
+		v.reset(OpPPC64SLD)
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh8x16 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8]))))
+	// match: (Lsh8x16 <t> x y)
+	// result: (ISEL [2] (SLD <t> (MOVBZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF8] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
-		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(8)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0xFFF8)
+		v4.AddArg(y)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -3030,33 +3042,35 @@ func rewriteValuePPC64_OpLsh8x32(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Lsh8x32 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SLW x y)
+	// result: (SLD x y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SLW)
+		v.reset(OpPPC64SLD)
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh8x32 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
+	// match: (Lsh8x32 <t> x y)
+	// result: (ISEL [0] (SLD <t> (MOVBZreg x) y) (MOVDconst [0]) (CMPWUconst y [8]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(8)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v3.AuxInt = int32ToAuxInt(8)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -3084,33 +3098,35 @@ func rewriteValuePPC64_OpLsh8x64(v *Value) bool {
 	}
 	// match: (Lsh8x64 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SLW x y)
+	// result: (SLD x y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SLW)
+		v.reset(OpPPC64SLD)
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh8x64 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
+	// match: (Lsh8x64 <t> x y)
+	// result: (ISEL [0] (SLD <t> (MOVBZreg x) y) (MOVDconst [0]) (CMPUconst y [8]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
 		v3.AuxInt = int64ToAuxInt(8)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -3121,35 +3137,37 @@ func rewriteValuePPC64_OpLsh8x8(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Lsh8x8 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SLW x y)
+	// result: (SLD x y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SLW)
+		v.reset(OpPPC64SLD)
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Lsh8x8 x y)
-	// result: (SLW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
+	// match: (Lsh8x8 <t> x y)
+	// result: (ISEL [2] (SLD <t> (MOVBZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F8] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SLW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
-		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(8)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SLD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0x00F8)
+		v4.AddArg(y)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -4162,6 +4180,28 @@ func rewriteValuePPC64_OpPPC64AND(v *Value) bool {
 	}
 	return false
 }
+func rewriteValuePPC64_OpPPC64ANDCCconst(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (ANDCCconst [c] (Select0 (ANDCCconst [d] x)))
+	// result: (ANDCCconst [c&d] x)
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpSelect0 {
+			break
+		}
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64ANDCCconst {
+			break
+		}
+		d := auxIntToInt64(v_0_0.AuxInt)
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64ANDCCconst)
+		v.AuxInt = int64ToAuxInt(c & d)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
 func rewriteValuePPC64_OpPPC64ANDN(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -4339,11 +4379,30 @@ func rewriteValuePPC64_OpPPC64CMPU(v *Value) bool {
 }
 func rewriteValuePPC64_OpPPC64CMPUconst(v *Value) bool {
 	v_0 := v.Args[0]
-	// match: (CMPUconst (MOVDconst [x]) [y])
-	// cond: x==y
-	// result: (FlagEQ)
+	// match: (CMPUconst [d] (Select0 (ANDCCconst z [c])))
+	// cond: uint64(d) > uint64(c)
+	// result: (FlagLT)
 	for {
-		y := auxIntToInt64(v.AuxInt)
+		d := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpSelect0 {
+			break
+		}
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64ANDCCconst {
+			break
+		}
+		c := auxIntToInt64(v_0_0.AuxInt)
+		if !(uint64(d) > uint64(c)) {
+			break
+		}
+		v.reset(OpPPC64FlagLT)
+		return true
+	}
+	// match: (CMPUconst (MOVDconst [x]) [y])
+	// cond: x==y
+	// result: (FlagEQ)
+	for {
+		y := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpPPC64MOVDconst {
 			break
 		}
@@ -4550,6 +4609,25 @@ func rewriteValuePPC64_OpPPC64CMPWU(v *Value) bool {
 }
 func rewriteValuePPC64_OpPPC64CMPWUconst(v *Value) bool {
 	v_0 := v.Args[0]
+	// match: (CMPWUconst [d] (Select0 (ANDCCconst z [c])))
+	// cond: uint64(d) > uint64(c)
+	// result: (FlagLT)
+	for {
+		d := auxIntToInt32(v.AuxInt)
+		if v_0.Op != OpSelect0 {
+			break
+		}
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64ANDCCconst {
+			break
+		}
+		c := auxIntToInt64(v_0_0.AuxInt)
+		if !(uint64(d) > uint64(c)) {
+			break
+		}
+		v.reset(OpPPC64FlagLT)
+		return true
+	}
 	// match: (CMPWUconst (MOVDconst [x]) [y])
 	// cond: int32(x)==int32(y)
 	// result: (FlagEQ)
@@ -5379,78 +5457,6 @@ func rewriteValuePPC64_OpPPC64ISEL(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	typ := &b.Func.Config.Types
-	// match: (ISEL [0] (Select0 (ANDCCconst [d] y)) (MOVDconst [-1]) (CMPU (Select0 (ANDCCconst [d] y)) (MOVDconst [c])))
-	// cond: c >= d
-	// result: (Select0 (ANDCCconst [d] y))
-	for {
-		if auxIntToInt32(v.AuxInt) != 0 || v_0.Op != OpSelect0 {
-			break
-		}
-		v_0_0 := v_0.Args[0]
-		if v_0_0.Op != OpPPC64ANDCCconst {
-			break
-		}
-		d := auxIntToInt64(v_0_0.AuxInt)
-		y := v_0_0.Args[0]
-		if v_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1.AuxInt) != -1 || v_2.Op != OpPPC64CMPU {
-			break
-		}
-		_ = v_2.Args[1]
-		v_2_0 := v_2.Args[0]
-		if v_2_0.Op != OpSelect0 {
-			break
-		}
-		v_2_0_0 := v_2_0.Args[0]
-		if v_2_0_0.Op != OpPPC64ANDCCconst || auxIntToInt64(v_2_0_0.AuxInt) != d || y != v_2_0_0.Args[0] {
-			break
-		}
-		v_2_1 := v_2.Args[1]
-		if v_2_1.Op != OpPPC64MOVDconst {
-			break
-		}
-		c := auxIntToInt64(v_2_1.AuxInt)
-		if !(c >= d) {
-			break
-		}
-		v.reset(OpSelect0)
-		v0 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
-		v0.AuxInt = int64ToAuxInt(d)
-		v0.AddArg(y)
-		v.AddArg(v0)
-		return true
-	}
-	// match: (ISEL [0] (Select0 (ANDCCconst [d] y)) (MOVDconst [-1]) (CMPUconst [c] (Select0 (ANDCCconst [d] y))))
-	// cond: c >= d
-	// result: (Select0 (ANDCCconst [d] y))
-	for {
-		if auxIntToInt32(v.AuxInt) != 0 || v_0.Op != OpSelect0 {
-			break
-		}
-		v_0_0 := v_0.Args[0]
-		if v_0_0.Op != OpPPC64ANDCCconst {
-			break
-		}
-		d := auxIntToInt64(v_0_0.AuxInt)
-		y := v_0_0.Args[0]
-		if v_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1.AuxInt) != -1 || v_2.Op != OpPPC64CMPUconst {
-			break
-		}
-		c := auxIntToInt64(v_2.AuxInt)
-		v_2_0 := v_2.Args[0]
-		if v_2_0.Op != OpSelect0 {
-			break
-		}
-		v_2_0_0 := v_2_0.Args[0]
-		if v_2_0_0.Op != OpPPC64ANDCCconst || auxIntToInt64(v_2_0_0.AuxInt) != d || y != v_2_0_0.Args[0] || !(c >= d) {
-			break
-		}
-		v.reset(OpSelect0)
-		v0 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
-		v0.AuxInt = int64ToAuxInt(d)
-		v0.AddArg(y)
-		v.AddArg(v0)
-		return true
-	}
 	// match: (ISEL [6] x y (Select1 (ANDCCconst [1] (SETBC [c] cmp))))
 	// result: (ISEL [c] x y cmp)
 	for {
@@ -14003,39 +14009,39 @@ func rewriteValuePPC64_OpRsh16Ux16(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh16Ux16 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRW (MOVHZreg x) y)
+	// result: (SRD (MOVHZreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRW)
+		v.reset(OpPPC64SRD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh16Ux16 x y)
-	// result: (SRW (ZeroExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16]))))
+	// match: (Rsh16Ux16 <t> x y)
+	// result: (ISEL [2] (SRD <t> (MOVHZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
 		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0xFFF0)
 		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v5.AuxInt = int64ToAuxInt(16)
-		v3.AddArg2(v4, v5)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -14046,37 +14052,37 @@ func rewriteValuePPC64_OpRsh16Ux32(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh16Ux32 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRW (MOVHZreg x) y)
+	// result: (SRD (MOVHZreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRW)
+		v.reset(OpPPC64SRD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh16Ux32 x y)
-	// result: (SRW (ZeroExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
+	// match: (Rsh16Ux32 <t> x y)
+	// result: (ISEL [0] (SRD <t> (MOVHZreg x) y) (MOVDconst [0]) (CMPWUconst y [16]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
 		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(16)
-		v3.AddArg2(y, v4)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v3.AuxInt = int32ToAuxInt(16)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -14106,37 +14112,37 @@ func rewriteValuePPC64_OpRsh16Ux64(v *Value) bool {
 	}
 	// match: (Rsh16Ux64 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRW (MOVHZreg x) y)
+	// result: (SRD (MOVHZreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRW)
+		v.reset(OpPPC64SRD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh16Ux64 x y)
-	// result: (SRW (ZeroExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
+	// match: (Rsh16Ux64 <t> x y)
+	// result: (ISEL [0] (SRD <t> (MOVHZreg x) y) (MOVDconst [0]) (CMPUconst y [16]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
 		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(16)
-		v3.AddArg2(y, v4)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v3.AuxInt = int64ToAuxInt(16)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -14147,39 +14153,39 @@ func rewriteValuePPC64_OpRsh16Ux8(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh16Ux8 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRW (MOVHZreg x) y)
+	// result: (SRD (MOVHZreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRW)
+		v.reset(OpPPC64SRD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh16Ux8 x y)
-	// result: (SRW (ZeroExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16]))))
+	// match: (Rsh16Ux8 <t> x y)
+	// result: (ISEL [2] (SRD <t> (MOVHZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpZeroExt16to32, typ.UInt32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
 		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0x00F0)
 		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v5.AuxInt = int64ToAuxInt(16)
-		v3.AddArg2(v4, v5)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -14190,39 +14196,40 @@ func rewriteValuePPC64_OpRsh16x16(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh16x16 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRAW (MOVHreg x) y)
+	// result: (SRAD (MOVHreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRAW)
+		v.reset(OpPPC64SRAD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh16x16 x y)
-	// result: (SRAW (SignExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [16]))))
+	// match: (Rsh16x16 <t> x y)
+	// result: (ISEL [2] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
-		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v2.AuxInt = int64ToAuxInt(15)
+		v2.AddArg(v1)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0xFFF0)
 		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v5.AuxInt = int64ToAuxInt(16)
-		v3.AddArg2(v4, v5)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -14233,37 +14240,38 @@ func rewriteValuePPC64_OpRsh16x32(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh16x32 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRAW (MOVHreg x) y)
+	// result: (SRAD (MOVHreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRAW)
+		v.reset(OpPPC64SRAD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh16x32 x y)
-	// result: (SRAW (SignExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
+	// match: (Rsh16x32 <t> x y)
+	// result: (ISEL [0] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (CMPWUconst y [16]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
-		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(16)
-		v3.AddArg2(y, v4)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v2.AuxInt = int64ToAuxInt(15)
+		v2.AddArg(v1)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v3.AuxInt = int32ToAuxInt(16)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -14312,37 +14320,38 @@ func rewriteValuePPC64_OpRsh16x64(v *Value) bool {
 	}
 	// match: (Rsh16x64 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRAW (MOVHreg x) y)
+	// result: (SRAD (MOVHreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRAW)
+		v.reset(OpPPC64SRAD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh16x64 x y)
-	// result: (SRAW (SignExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [16]))))
+	// match: (Rsh16x64 <t> x y)
+	// result: (ISEL [0] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (CMPUconst y [16]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
-		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(16)
-		v3.AddArg2(y, v4)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v2.AuxInt = int64ToAuxInt(15)
+		v2.AddArg(v1)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v3.AuxInt = int64ToAuxInt(16)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -14353,39 +14362,40 @@ func rewriteValuePPC64_OpRsh16x8(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh16x8 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRAW (MOVHreg x) y)
+	// result: (SRAD (MOVHreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRAW)
+		v.reset(OpPPC64SRAD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh16x8 x y)
-	// result: (SRAW (SignExt16to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [16]))))
+	// match: (Rsh16x8 <t> x y)
+	// result: (ISEL [2] (SRAD <t> (MOVHreg x) y) (SRADconst <t> (MOVHreg x) [15]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpSignExt16to32, typ.Int32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
-		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v2.AuxInt = int64ToAuxInt(15)
+		v2.AddArg(v1)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0x00F0)
 		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v5.AuxInt = int64ToAuxInt(16)
-		v3.AddArg2(v4, v5)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -14407,24 +14417,24 @@ func rewriteValuePPC64_OpRsh32Ux16(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh32Ux16 x y)
-	// result: (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [32]))))
+	// match: (Rsh32Ux16 <t> x y)
+	// result: (ISEL [2] (SRW <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFE0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRW, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0xFFE0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14446,22 +14456,22 @@ func rewriteValuePPC64_OpRsh32Ux32(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh32Ux32 x y)
-	// result: (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
+	// match: (Rsh32Ux32 <t> x y)
+	// result: (ISEL [0] (SRW <t> x y) (MOVDconst [0]) (CMPWUconst y [32]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRW, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v2.AuxInt = int32ToAuxInt(32)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14500,22 +14510,22 @@ func rewriteValuePPC64_OpRsh32Ux64(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh32Ux64 x y)
-	// result: (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
+	// match: (Rsh32Ux64 <t> x y)
+	// result: (ISEL [0] (SRW <t> x y) (MOVDconst [0]) (CMPUconst y [32]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRW, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v2.AuxInt = int64ToAuxInt(32)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14537,24 +14547,24 @@ func rewriteValuePPC64_OpRsh32Ux8(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh32Ux8 x y)
-	// result: (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [32]))))
+	// match: (Rsh32Ux8 <t> x y)
+	// result: (ISEL [2] (SRW <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00E0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRW, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0x00E0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14576,24 +14586,25 @@ func rewriteValuePPC64_OpRsh32x16(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh32x16 x y)
-	// result: (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [32]))))
+	// match: (Rsh32x16 <t> x y)
+	// result: (ISEL [2] (SRAW <t> x y) (SRAWconst <t> x [31]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFE0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAW, t)
+		v0.AddArg2(x, y)
+		v1 := b.NewValue0(v.Pos, OpPPC64SRAWconst, t)
+		v1.AuxInt = int64ToAuxInt(31)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0xFFE0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14601,7 +14612,6 @@ func rewriteValuePPC64_OpRsh32x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	typ := &b.Func.Config.Types
 	// match: (Rsh32x32 x y)
 	// cond: shiftIsBounded(v)
 	// result: (SRAW x y)
@@ -14615,22 +14625,23 @@ func rewriteValuePPC64_OpRsh32x32(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh32x32 x y)
-	// result: (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
+	// match: (Rsh32x32 <t> x y)
+	// result: (ISEL [0] (SRAW <t> x y) (SRAWconst <t> x [31]) (CMPWUconst y [32]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAW, t)
+		v0.AddArg2(x, y)
+		v1 := b.NewValue0(v.Pos, OpPPC64SRAWconst, t)
+		v1.AuxInt = int64ToAuxInt(31)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v2.AuxInt = int32ToAuxInt(32)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14638,7 +14649,6 @@ func rewriteValuePPC64_OpRsh32x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	typ := &b.Func.Config.Types
 	// match: (Rsh32x64 x (MOVDconst [c]))
 	// cond: uint64(c) >= 32
 	// result: (SRAWconst x [63])
@@ -14686,22 +14696,23 @@ func rewriteValuePPC64_OpRsh32x64(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh32x64 x y)
-	// result: (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
+	// match: (Rsh32x64 <t> x y)
+	// result: (ISEL [0] (SRAW <t> x y) (SRAWconst <t> x [31]) (CMPUconst y [32]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAW, t)
+		v0.AddArg2(x, y)
+		v1 := b.NewValue0(v.Pos, OpPPC64SRAWconst, t)
+		v1.AuxInt = int64ToAuxInt(31)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v2.AuxInt = int64ToAuxInt(32)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14723,24 +14734,25 @@ func rewriteValuePPC64_OpRsh32x8(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh32x8 x y)
-	// result: (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [32]))))
+	// match: (Rsh32x8 <t> x y)
+	// result: (ISEL [2] (SRAW <t> x y) (SRAWconst <t> x [31]) (Select1 <types.TypeFlags> (ANDCCconst [0x00E0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAW, t)
+		v0.AddArg2(x, y)
+		v1 := b.NewValue0(v.Pos, OpPPC64SRAWconst, t)
+		v1.AuxInt = int64ToAuxInt(31)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0x00E0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(32)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14762,24 +14774,24 @@ func rewriteValuePPC64_OpRsh64Ux16(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh64Ux16 x y)
-	// result: (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [64]))))
+	// match: (Rsh64Ux16 <t> x y)
+	// result: (ISEL [2] (SRD <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFC0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0xFFC0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14801,22 +14813,22 @@ func rewriteValuePPC64_OpRsh64Ux32(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh64Ux32 x y)
-	// result: (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
+	// match: (Rsh64Ux32 <t> x y)
+	// result: (ISEL [0] (SRD <t> x y) (MOVDconst [0]) (CMPWUconst y [64]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v2.AuxInt = int32ToAuxInt(64)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14855,22 +14867,22 @@ func rewriteValuePPC64_OpRsh64Ux64(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh64Ux64 x y)
-	// result: (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
+	// match: (Rsh64Ux64 <t> x y)
+	// result: (ISEL [0] (SRD <t> x y) (MOVDconst [0]) (CMPUconst y [64]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v2.AuxInt = int64ToAuxInt(64)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14892,24 +14904,24 @@ func rewriteValuePPC64_OpRsh64Ux8(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh64Ux8 x y)
-	// result: (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [64]))))
+	// match: (Rsh64Ux8 <t> x y)
+	// result: (ISEL [2] (SRD <t> x y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00C0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v0.AddArg2(x, y)
 		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v1.AuxInt = int64ToAuxInt(0)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0x00C0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14931,24 +14943,25 @@ func rewriteValuePPC64_OpRsh64x16(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh64x16 x y)
-	// result: (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [64]))))
+	// match: (Rsh64x16 <t> x y)
+	// result: (ISEL [2] (SRAD <t> x y) (SRADconst <t> x [63]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFC0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v0.AddArg2(x, y)
+		v1 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v1.AuxInt = int64ToAuxInt(63)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0xFFC0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14956,7 +14969,6 @@ func rewriteValuePPC64_OpRsh64x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	typ := &b.Func.Config.Types
 	// match: (Rsh64x32 x y)
 	// cond: shiftIsBounded(v)
 	// result: (SRAD x y)
@@ -14970,22 +14982,23 @@ func rewriteValuePPC64_OpRsh64x32(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh64x32 x y)
-	// result: (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
+	// match: (Rsh64x32 <t> x y)
+	// result: (ISEL [0] (SRAD <t> x y) (SRADconst <t> x [63]) (CMPWUconst y [64]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v0.AddArg2(x, y)
+		v1 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v1.AuxInt = int64ToAuxInt(63)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v2.AuxInt = int32ToAuxInt(64)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -14993,7 +15006,6 @@ func rewriteValuePPC64_OpRsh64x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	b := v.Block
-	typ := &b.Func.Config.Types
 	// match: (Rsh64x64 x (MOVDconst [c]))
 	// cond: uint64(c) >= 64
 	// result: (SRADconst x [63])
@@ -15041,22 +15053,23 @@ func rewriteValuePPC64_OpRsh64x64(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh64x64 x y)
-	// result: (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
+	// match: (Rsh64x64 <t> x y)
+	// result: (ISEL [0] (SRAD <t> x y) (SRADconst <t> x [63]) (CMPUconst y [64]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v3.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(y, v3)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v0.AddArg2(x, y)
+		v1 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v1.AuxInt = int64ToAuxInt(63)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v2.AuxInt = int64ToAuxInt(64)
+		v2.AddArg(y)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -15078,24 +15091,25 @@ func rewriteValuePPC64_OpRsh64x8(v *Value) bool {
 		v.AddArg2(x, y)
 		return true
 	}
-	// match: (Rsh64x8 x y)
-	// result: (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [64]))))
+	// match: (Rsh64x8 <t> x y)
+	// result: (ISEL [2] (SRAD <t> x y) (SRADconst <t> x [63]) (Select1 <types.TypeFlags> (ANDCCconst [0x00C0] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAD)
-		v0 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v0.AuxInt = int32ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v1.AuxInt = int64ToAuxInt(-1)
-		v2 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v3 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v0.AddArg2(x, y)
+		v1 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v1.AuxInt = int64ToAuxInt(63)
+		v1.AddArg(x)
+		v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v3 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v3.AuxInt = int64ToAuxInt(0x00C0)
 		v3.AddArg(y)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(64)
-		v2.AddArg2(v3, v4)
-		v0.AddArg3(y, v1, v2)
-		v.AddArg2(x, v0)
+		v2.AddArg(v3)
+		v.AddArg3(v0, v1, v2)
 		return true
 	}
 }
@@ -15106,39 +15120,39 @@ func rewriteValuePPC64_OpRsh8Ux16(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh8Ux16 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRW (MOVBZreg x) y)
+	// result: (SRD (MOVBZreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRW)
+		v.reset(OpPPC64SRD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh8Ux16 x y)
-	// result: (SRW (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8]))))
+	// match: (Rsh8Ux16 <t> x y)
+	// result: (ISEL [2] (SRD <t> (MOVBZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF8] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
 		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0xFFF8)
 		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v5.AuxInt = int64ToAuxInt(8)
-		v3.AddArg2(v4, v5)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -15149,37 +15163,37 @@ func rewriteValuePPC64_OpRsh8Ux32(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh8Ux32 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRW (MOVBZreg x) y)
+	// result: (SRD (MOVBZreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRW)
+		v.reset(OpPPC64SRD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh8Ux32 x y)
-	// result: (SRW (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
+	// match: (Rsh8Ux32 <t> x y)
+	// result: (ISEL [0] (SRD <t> (MOVBZreg x) y) (MOVDconst [0]) (CMPWUconst y [8]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
 		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(8)
-		v3.AddArg2(y, v4)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v3.AuxInt = int32ToAuxInt(8)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -15209,37 +15223,37 @@ func rewriteValuePPC64_OpRsh8Ux64(v *Value) bool {
 	}
 	// match: (Rsh8Ux64 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRW (MOVBZreg x) y)
+	// result: (SRD (MOVBZreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRW)
+		v.reset(OpPPC64SRD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh8Ux64 x y)
-	// result: (SRW (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
+	// match: (Rsh8Ux64 <t> x y)
+	// result: (ISEL [0] (SRD <t> (MOVBZreg x) y) (MOVDconst [0]) (CMPUconst y [8]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
 		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(8)
-		v3.AddArg2(y, v4)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v3.AuxInt = int64ToAuxInt(8)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -15250,39 +15264,39 @@ func rewriteValuePPC64_OpRsh8Ux8(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh8Ux8 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRW (MOVBZreg x) y)
+	// result: (SRD (MOVBZreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRW)
+		v.reset(OpPPC64SRD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh8Ux8 x y)
-	// result: (SRW (ZeroExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
+	// match: (Rsh8Ux8 <t> x y)
+	// result: (ISEL [2] (SRD <t> (MOVBZreg x) y) (MOVDconst [0]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F8] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRW)
-		v0 := b.NewValue0(v.Pos, OpZeroExt8to32, typ.UInt32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
 		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v2.AuxInt = int64ToAuxInt(0)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0x00F8)
 		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v5.AuxInt = int64ToAuxInt(8)
-		v3.AddArg2(v4, v5)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -15293,39 +15307,40 @@ func rewriteValuePPC64_OpRsh8x16(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh8x16 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRAW (MOVBreg x) y)
+	// result: (SRAD (MOVBreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRAW)
+		v.reset(OpPPC64SRAD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh8x16 x y)
-	// result: (SRAW (SignExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt16to64 y) (MOVDconst [8]))))
+	// match: (Rsh8x16 <t> x y)
+	// result: (ISEL [2] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (Select1 <types.TypeFlags> (ANDCCconst [0xFFF8] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
-		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpZeroExt16to64, typ.UInt64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v2.AuxInt = int64ToAuxInt(7)
+		v2.AddArg(v1)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0xFFF8)
 		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v5.AuxInt = int64ToAuxInt(8)
-		v3.AddArg2(v4, v5)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -15336,37 +15351,38 @@ func rewriteValuePPC64_OpRsh8x32(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh8x32 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRAW (MOVBreg x) y)
+	// result: (SRAD (MOVBreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRAW)
+		v.reset(OpPPC64SRAD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh8x32 x y)
-	// result: (SRAW (SignExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
+	// match: (Rsh8x32 <t> x y)
+	// result: (ISEL [0] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (CMPWUconst y [8]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
-		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(8)
-		v3.AddArg2(y, v4)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v2.AuxInt = int64ToAuxInt(7)
+		v2.AddArg(v1)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPWUconst, types.TypeFlags)
+		v3.AuxInt = int32ToAuxInt(8)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -15415,37 +15431,38 @@ func rewriteValuePPC64_OpRsh8x64(v *Value) bool {
 	}
 	// match: (Rsh8x64 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRAW (MOVBreg x) y)
+	// result: (SRAD (MOVBreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRAW)
+		v.reset(OpPPC64SRAD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh8x64 x y)
-	// result: (SRAW (SignExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [8]))))
+	// match: (Rsh8x64 <t> x y)
+	// result: (ISEL [0] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (CMPUconst y [8]))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
-		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v4.AuxInt = int64ToAuxInt(8)
-		v3.AddArg2(y, v4)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(0)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v2.AuxInt = int64ToAuxInt(7)
+		v2.AddArg(v1)
+		v3 := b.NewValue0(v.Pos, OpPPC64CMPUconst, types.TypeFlags)
+		v3.AuxInt = int64ToAuxInt(8)
+		v3.AddArg(y)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -15456,39 +15473,40 @@ func rewriteValuePPC64_OpRsh8x8(v *Value) bool {
 	typ := &b.Func.Config.Types
 	// match: (Rsh8x8 x y)
 	// cond: shiftIsBounded(v)
-	// result: (SRAW (MOVBreg x) y)
+	// result: (SRAD (MOVBreg x) y)
 	for {
 		x := v_0
 		y := v_1
 		if !(shiftIsBounded(v)) {
 			break
 		}
-		v.reset(OpPPC64SRAW)
+		v.reset(OpPPC64SRAD)
 		v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
 		v0.AddArg(x)
 		v.AddArg2(v0, y)
 		return true
 	}
-	// match: (Rsh8x8 x y)
-	// result: (SRAW (SignExt8to32 x) (ISEL [0] y (MOVDconst [-1]) (CMPU (ZeroExt8to64 y) (MOVDconst [8]))))
+	// match: (Rsh8x8 <t> x y)
+	// result: (ISEL [2] (SRAD <t> (MOVBreg x) y) (SRADconst <t> (MOVBreg x) [7]) (Select1 <types.TypeFlags> (ANDCCconst [0x00F8] y)))
 	for {
+		t := v.Type
 		x := v_0
 		y := v_1
-		v.reset(OpPPC64SRAW)
-		v0 := b.NewValue0(v.Pos, OpSignExt8to32, typ.Int32)
-		v0.AddArg(x)
-		v1 := b.NewValue0(v.Pos, OpPPC64ISEL, typ.Int32)
-		v1.AuxInt = int32ToAuxInt(0)
-		v2 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v2.AuxInt = int64ToAuxInt(-1)
-		v3 := b.NewValue0(v.Pos, OpPPC64CMPU, types.TypeFlags)
-		v4 := b.NewValue0(v.Pos, OpZeroExt8to64, typ.UInt64)
+		v.reset(OpPPC64ISEL)
+		v.AuxInt = int32ToAuxInt(2)
+		v0 := b.NewValue0(v.Pos, OpPPC64SRAD, t)
+		v1 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg2(v1, y)
+		v2 := b.NewValue0(v.Pos, OpPPC64SRADconst, t)
+		v2.AuxInt = int64ToAuxInt(7)
+		v2.AddArg(v1)
+		v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+		v4 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
+		v4.AuxInt = int64ToAuxInt(0x00F8)
 		v4.AddArg(y)
-		v5 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v5.AuxInt = int64ToAuxInt(8)
-		v3.AddArg2(v4, v5)
-		v1.AddArg3(y, v2, v3)
-		v.AddArg2(v0, v1)
+		v3.AddArg(v4)
+		v.AddArg3(v0, v2, v3)
 		return true
 	}
 }
@@ -15636,30 +15654,6 @@ func rewriteValuePPC64_OpSelect0(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
-	// match: (Select0 (ANDCCconst [c] (Select0 (ANDCCconst [d] x))))
-	// result: (Select0 (ANDCCconst [c&d] x))
-	for {
-		if v_0.Op != OpPPC64ANDCCconst {
-			break
-		}
-		c := auxIntToInt64(v_0.AuxInt)
-		v_0_0 := v_0.Args[0]
-		if v_0_0.Op != OpSelect0 {
-			break
-		}
-		v_0_0_0 := v_0_0.Args[0]
-		if v_0_0_0.Op != OpPPC64ANDCCconst {
-			break
-		}
-		d := auxIntToInt64(v_0_0_0.AuxInt)
-		x := v_0_0_0.Args[0]
-		v.reset(OpSelect0)
-		v0 := b.NewValue0(v.Pos, OpPPC64ANDCCconst, types.NewTuple(typ.Int, types.TypeFlags))
-		v0.AuxInt = int64ToAuxInt(c & d)
-		v0.AddArg(x)
-		v.AddArg(v0)
-		return true
-	}
 	// match: (Select0 (ANDCCconst [-1] x))
 	// result: x
 	for {
@@ -15954,6 +15948,15 @@ func rewriteValuePPC64_OpSelect1(v *Value) bool {
 		v.copyOf(x)
 		return true
 	}
+	// match: (Select1 (ANDCCconst [0] _))
+	// result: (FlagEQ)
+	for {
+		if v_0.Op != OpPPC64ANDCCconst || auxIntToInt64(v_0.AuxInt) != 0 {
+			break
+		}
+		v.reset(OpPPC64FlagEQ)
+		return true
+	}
 	return false
 }
 func rewriteValuePPC64_OpSelectN(v *Value) bool {
diff --git a/src/cmd/compile/internal/test/shift_test.go b/src/cmd/compile/internal/test/shift_test.go
index 278a47da29..dd893a1121 100644
--- a/src/cmd/compile/internal/test/shift_test.go
+++ b/src/cmd/compile/internal/test/shift_test.go
@@ -1061,3 +1061,92 @@ func TestIncorrectRotate(t *testing.T) {
 		t.Errorf("got %x want 0", got)
 	}
 }
+
+//go:noinline
+func variableShiftOverflow64x8(x int64, y, z uint8) (a, b, c int64) {
+	// Verify junk bits are ignored when doing a variable shift.
+	return x >> (y + z), x << (y + z), int64(uint64(x) >> (y + z))
+}
+
+//go:noinline
+func variableShiftOverflow32x8(x int32, y, z uint8) (a, b, c int32) {
+	// Verify junk bits are ignored when doing a variable shift.
+	return x >> (y + z), x << (y + z), int32(uint32(x) >> (y + z))
+}
+
+//go:noinline
+func variableShiftOverflow16x8(x int16, y, z uint8) (a, b, c int16) {
+	// Verify junk bits are ignored when doing a variable shift.
+	return x >> (y + z), x << (y + z), int16(uint16(x) >> (y + z))
+}
+
+//go:noinline
+func variableShiftOverflow8x8(x int8, y, z uint8) (a, b, c int8) {
+	// Verify junk bits are ignored when doing a variable shift.
+	return x >> (y + z), x << (y + z), int8(uint8(x) >> (y + z))
+}
+
+//go:noinline
+func variableShiftOverflow64x16(x int64, y, z uint16) (a, b, c int64) {
+	// Verify junk bits are ignored when doing a variable shift.
+	return x >> (y + z), x << (y + z), int64(uint64(x) >> (y + z))
+}
+
+//go:noinline
+func variableShiftOverflow32x16(x int32, y, z uint16) (a, b, c int32) {
+	// Verify junk bits are ignored when doing a variable shift.
+	return x >> (y + z), x << (y + z), int32(uint32(x) >> (y + z))
+}
+
+//go:noinline
+func variableShiftOverflow16x16(x int16, y, z uint16) (a, b, c int16) {
+	// Verify junk bits are ignored when doing a variable shift.
+	return x >> (y + z), x << (y + z), int16(uint16(x) >> (y + z))
+}
+
+//go:noinline
+func variableShiftOverflow8x16(x int8, y, z uint16) (a, b, c int8) {
+	// Verify junk bits are ignored when doing a variable shift.
+	return x >> (y + z), x << (y + z), int8(uint8(x) >> (y + z))
+}
+
+//go:noinline
+func makeU8(x uint64) uint8 {
+	// Ensure the upper portions of the register are clear before testing large shift values
+	// using non-native types (e.g uint8 on PPC64).
+	return uint8(x)
+}
+
+//go:noinline
+func makeU16(x uint64) uint16 {
+	// Ensure the upper portions of the register are clear before testing large shift values
+	// using non-native types (e.g uint8 on PPC64).
+	return uint16(x)
+}
+
+func TestShiftOverflow(t *testing.T) {
+	if v, w, z := variableShiftOverflow64x8(-64, makeU8(255), 2); v != -32 || w != -128 || z != 0x7fffffffffffffe0 {
+		t.Errorf("got %d %d 0x%x, expected -32 -128 0x7fffffffffffffe0", v, w, z)
+	}
+	if v, w, z := variableShiftOverflow32x8(-64, makeU8(255), 2); v != -32 || w != -128 || z != 0x7fffffe0 {
+		t.Errorf("got %d %d 0x%x, expected -32 -128 0x7fffffe0", v, w, z)
+	}
+	if v, w, z := variableShiftOverflow16x8(-64, makeU8(255), 2); v != -32 || w != -128 || z != 0x7fe0 {
+		t.Errorf("got %d %d 0x%x, expected -32 -128 0x7fe0", v, w, z)
+	}
+	if v, w, z := variableShiftOverflow8x8(-64, makeU8(255), 2); v != -32 || w != -128 || z != 0x60 {
+		t.Errorf("got %d %d 0x%x, expected -32 -128 0x60", v, w, z)
+	}
+	if v, w, z := variableShiftOverflow64x16(-64, makeU16(0xffff), 2); v != -32 || w != -128 || z != 0x7fffffffffffffe0 {
+		t.Errorf("got %d %d 0x%x, expected -32 -128 0x7fffffffffffffe0", v, w, z)
+	}
+	if v, w, z := variableShiftOverflow32x16(-64, makeU16(0xffff), 2); v != -32 || w != -128 || z != 0x7fffffe0 {
+		t.Errorf("got %d %d 0x%x, expected -32 -128 0x7fffffe0,", v, w, z)
+	}
+	if v, w, z := variableShiftOverflow16x16(-64, makeU16(0xffff), 2); v != -32 || w != -128 || z != 0x7fe0 {
+		t.Errorf("got %d %d 0x%x, expected -32 -128 0x7fe0", v, w, z)
+	}
+	if v, w, z := variableShiftOverflow8x16(-64, makeU16(0xffff), 2); v != -32 || w != -128 || z != 0x60 {
+		t.Errorf("got %d %d 0x%x, expected -32 -128 0x60", v, w, z)
+	}
+}
-- 
2.52.0