]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: add rotates to PPC64.rules
authorLynn Boger <laboger@linux.vnet.ibm.com>
Tue, 18 Apr 2017 21:05:31 +0000 (17:05 -0400)
committerLynn Boger <laboger@linux.vnet.ibm.com>
Thu, 20 Apr 2017 18:05:22 +0000 (18:05 +0000)
This updates PPC64.rules to include rules to generate rotates
for ADD, OR, XOR operators that combine two opposite shifts
that sum to 32 or 64.

To support this change opcodes for ROTL and ROTLW were added to
be used like the rotldi and rotlwi extended mnemonics.

This provides the following improvement in sha3:

BenchmarkPermutationFunction-8     302.83       376.40       1.24x
BenchmarkSha3_512_MTU-8            98.64        121.92       1.24x
BenchmarkSha3_384_MTU-8            136.80       168.30       1.23x
BenchmarkSha3_256_MTU-8            169.21       211.29       1.25x
BenchmarkSha3_224_MTU-8            179.76       221.19       1.23x
BenchmarkShake128_MTU-8            212.87       263.23       1.24x
BenchmarkShake256_MTU-8            196.62       245.60       1.25x
BenchmarkShake256_16x-8            163.57       194.37       1.19x
BenchmarkShake256_1MiB-8           199.02       248.74       1.25x
BenchmarkSha3_512_1MiB-8           106.55       133.13       1.25x

Fixes #20030

Change-Id: I484c56f48395d32f53ff3ecb3ac6cb8191cfee44
Reviewed-on: https://go-review.googlesource.com/40992
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/cmd/asm/internal/asm/testdata/ppc64.s
src/cmd/compile/internal/gc/asm_test.go
src/cmd/compile/internal/ppc64/ssa.go
src/cmd/compile/internal/ssa/gen/PPC64.rules
src/cmd/compile/internal/ssa/gen/PPC64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewritePPC64.go
src/cmd/internal/obj/ppc64/a.out.go
src/cmd/internal/obj/ppc64/anames.go
src/cmd/internal/obj/ppc64/asm9.go

index e26659305024661d73fadf8104a6be0a216e9e58..30fb0f2c02bbabb56c3e93cf322d4551413d6590 100644 (file)
@@ -581,6 +581,15 @@ label1:
 //     cmpb  RA,RS,RB
        CMPB  R2,R2,R1
 
+//
+// rotate extended mnemonics map onto other shift instructions
+//
+
+       ROTL    $12,R2,R3
+       ROTL    R2,R3,R4
+       ROTLW   $9,R2,R3
+       ROTLW   R2,R3,R4
+
 //
 // rotate and mask
 //
@@ -617,6 +626,17 @@ label1:
 
        RLDIMI  $7, R2, $52, R7
 
+// opcodes for right and left shifts, const and reg shift counts
+
+       SLD     $4, R3, R4
+       SLD     R2, R3, R4
+       SLW     $4, R3, R4
+       SLW     R2, R3, R4
+       SRD     $8, R3, R4
+       SRD     R2, R3, R4
+       SRW     $8, R3, R4
+       SRW     R2, R3, R4
+
 //
 // load/store multiple
 //
index d6174a9b190c3ecdf783272d8faff4991d94b430..0c0d8a2bc04e30959e4f0f59a28078f471847450 100644 (file)
@@ -1543,6 +1543,54 @@ var linuxPPC64LETests = []*asmTest{
                `,
                []string{"\tFMSUBS\t"},
        },
+       {
+               `
+               func f4(x uint32) uint32 {
+                       return x<<7 | x>>25
+               }
+               `,
+               []string{"\tROTLW\t"},
+       },
+       {
+               `
+               func f5(x uint32) uint32 {
+                       return x<<7 + x>>25
+               }
+               `,
+               []string{"\tROTLW\t"},
+       },
+       {
+               `
+               func f6(x uint32) uint32 {
+                       return x<<7 ^ x>>25
+               }
+               `,
+               []string{"\tROTLW\t"},
+       },
+       {
+               `
+               func f7(x uint64) uint64 {
+                       return x<<7 | x>>57
+               }
+               `,
+               []string{"\tROTL\t"},
+       },
+       {
+               `
+               func f8(x uint64) uint64 {
+                       return x<<7 + x>>57
+               }
+               `,
+               []string{"\tROTL\t"},
+       },
+       {
+               `
+               func f9(x uint64) uint64 {
+                       return x<<7 ^ x>>57
+               }
+               `,
+               []string{"\tROTL\t"},
+       },
 }
 
 // TestLineNumber checks to make sure the generated assembly has line numbers
index c2b561c49f07f5f99554127827680ba1fc78bcab..62aeee2fd676e08dc6029f7d6fff44db592a0663 100644 (file)
@@ -554,6 +554,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
 
+       case ssa.OpPPC64ROTLconst, ssa.OpPPC64ROTLWconst:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = v.AuxInt
+               p.Reg = v.Args[0].Reg()
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
+
        case ssa.OpPPC64FMADD, ssa.OpPPC64FMADDS, ssa.OpPPC64FMSUB, ssa.OpPPC64FMSUBS:
                r := v.Reg()
                r1 := v.Args[0].Reg()
index a86d131c87a919c1eae9b85ec6fadabf13266769..90a574841dfd93f84432f4da1950458e1a2a0318 100644 (file)
 (ConstNil) -> (MOVDconst [0])
 (ConstBool [b]) -> (MOVDconst [b])
 
+// Rotate generation
+(ADD (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
+( OR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
+(XOR (SLDconst x [c]) (SRDconst x [d])) && d == 64-c -> (ROTLconst [c] x)
+
+(ADD (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
+( OR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
+(XOR (SLWconst x [c]) (SRWconst x [d])) && d == 32-c -> (ROTLWconst [c] x)
+
 (Lsh64x64  x (Const64 [c])) && uint64(c) < 64 -> (SLDconst x [c])
 (Rsh64x64  x (Const64 [c])) && uint64(c) < 64 -> (SRADconst x [c])
 (Rsh64Ux64 x (Const64 [c])) && uint64(c) < 64 -> (SRDconst x [c])
index 78de5c076451238cf9eb31731f0a818038508604..6cca77eca98e8d79162df03feb751695168d31a5 100644 (file)
@@ -195,6 +195,9 @@ func init() {
                {name: "SLDconst", argLength: 1, reg: gp11, asm: "SLD", aux: "Int64"},   // arg0 << aux, 64 bits
                {name: "SLWconst", argLength: 1, reg: gp11, asm: "SLW", aux: "Int64"},   // arg0 << aux, 32 bits
 
+               {name: "ROTLconst", argLength: 1, reg: gp11, asm: "ROTL", aux: "Int64"},   // arg0 rotate left by auxInt bits
+               {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
+
                {name: "FDIV", argLength: 2, reg: fp21, asm: "FDIV"},   // arg0/arg1
                {name: "FDIVS", argLength: 2, reg: fp21, asm: "FDIVS"}, // arg0/arg1
 
index 9ff763431e11835d00ec92aa53cb3b8938d1c485..a7977fec955ce5d9a30eca7b6cb665bc94645982 100644 (file)
@@ -1292,6 +1292,8 @@ const (
        OpPPC64SRWconst
        OpPPC64SLDconst
        OpPPC64SLWconst
+       OpPPC64ROTLconst
+       OpPPC64ROTLWconst
        OpPPC64FDIV
        OpPPC64FDIVS
        OpPPC64DIVD
@@ -16537,6 +16539,34 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "ROTLconst",
+               auxType: auxInt64,
+               argLen:  1,
+               asm:     ppc64.AROTL,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
+       {
+               name:    "ROTLWconst",
+               auxType: auxInt64,
+               argLen:  1,
+               asm:     ppc64.AROTLW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+                       outputs: []outputInfo{
+                               {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+                       },
+               },
+       },
        {
                name:   "FDIV",
                argLen: 2,
index 4afb2763121aa587e72d450ffa8f9bd3f27e2b5a..490859c9be82b82c1ec9a596eef7c738e0109e1b 100644 (file)
@@ -4391,6 +4391,110 @@ func rewriteValuePPC64_OpOrB(v *Value) bool {
        }
 }
 func rewriteValuePPC64_OpPPC64ADD(v *Value) bool {
+       // match: (ADD (SLDconst x [c]) (SRDconst x [d]))
+       // cond: d == 64-c
+       // result: (ROTLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SLDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SRDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADD (SRDconst x [d]) (SLDconst x [c]))
+       // cond: d == 64-c
+       // result: (ROTLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SRDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SLDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADD (SLWconst x [c]) (SRWconst x [d]))
+       // cond: d == 32-c
+       // result: (ROTLWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SLWconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADD (SRWconst x [d]) (SLWconst x [c]))
+       // cond: d == 32-c
+       // result: (ROTLWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SRWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SLWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
        // match: (ADD x (MOVDconst [c]))
        // cond: is32Bit(c)
        // result: (ADDconst [c] x)
@@ -7362,6 +7466,110 @@ func rewriteValuePPC64_OpPPC64NotEqual(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpPPC64OR(v *Value) bool {
+       // match: (OR (SLDconst x [c]) (SRDconst x [d]))
+       // cond: d == 64-c
+       // result: (ROTLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SLDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SRDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (SRDconst x [d]) (SLDconst x [c]))
+       // cond: d == 64-c
+       // result: (ROTLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SRDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SLDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (SLWconst x [c]) (SRWconst x [d]))
+       // cond: d == 32-c
+       // result: (ROTLWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SLWconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (OR (SRWconst x [d]) (SLWconst x [c]))
+       // cond: d == 32-c
+       // result: (ROTLWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SRWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SLWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
        // match: (OR (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [c|d])
@@ -7521,6 +7729,110 @@ func rewriteValuePPC64_OpPPC64SUB(v *Value) bool {
        return false
 }
 func rewriteValuePPC64_OpPPC64XOR(v *Value) bool {
+       // match: (XOR (SLDconst x [c]) (SRDconst x [d]))
+       // cond: d == 64-c
+       // result: (ROTLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SLDconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SRDconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (XOR (SRDconst x [d]) (SLDconst x [c]))
+       // cond: d == 64-c
+       // result: (ROTLconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SRDconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SLDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 64-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (XOR (SLWconst x [c]) (SRWconst x [d]))
+       // cond: d == 32-c
+       // result: (ROTLWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SLWconst {
+                       break
+               }
+               c := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SRWconst {
+                       break
+               }
+               d := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       // match: (XOR (SRWconst x [d]) (SLWconst x [c]))
+       // cond: d == 32-c
+       // result: (ROTLWconst [c] x)
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpPPC64SRWconst {
+                       break
+               }
+               d := v_0.AuxInt
+               x := v_0.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpPPC64SLWconst {
+                       break
+               }
+               c := v_1.AuxInt
+               if x != v_1.Args[0] {
+                       break
+               }
+               if !(d == 32-c) {
+                       break
+               }
+               v.reset(OpPPC64ROTLWconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
        // match: (XOR (MOVDconst [c]) (MOVDconst [d]))
        // cond:
        // result: (MOVDconst [c^d])
index 15e143d12c88d61a97a7585ba3091cd581a3cece..90a204745b080c622301834e355ffdd22d16fb76 100644 (file)
@@ -680,6 +680,8 @@ const (
        ARLDCLCC
        ARLDICL
        ARLDICLCC
+       AROTL
+       AROTLW
        ASLBIA
        ASLBIE
        ASLBMFEE
index 01f4a7d41b1a6b4fc5dc8c7ff1bf7d40652d7285..5ca29454a60daebe43c2a2767646ad1f52e6d3b1 100644 (file)
@@ -301,6 +301,8 @@ var Anames = []string{
        "RLDCLCC",
        "RLDICL",
        "RLDICLCC",
+       "ROTL",
+       "ROTLW",
        "SLBIA",
        "SLBIE",
        "SLBMFEE",
index 033203bfd46cfdb8a00cf6333344383d2de49ff1..c835ef7f1789c1be9e0ca853b1ff18aaeb9d8b2b 100644 (file)
@@ -1655,11 +1655,13 @@ func buildop(ctxt *obj.Link) {
                        opset(ASLWCC, r0)
                        opset(ASRW, r0)
                        opset(ASRWCC, r0)
+                       opset(AROTLW, r0)
 
                case ASLD:
                        opset(ASLDCC, r0)
                        opset(ASRD, r0)
                        opset(ASRDCC, r0)
+                       opset(AROTL, r0)
 
                case ASRAW: /* sraw Rb,Rs,Ra; srawi sh,Rs,Ra */
                        opset(ASRAWCC, r0)
@@ -1971,10 +1973,12 @@ const (
        OP_ORI    = 24<<26 | 0<<1 | 0<<10 | 0
        OP_ORIS   = 25<<26 | 0<<1 | 0<<10 | 0
        OP_RLWINM = 21<<26 | 0<<1 | 0<<10 | 0
+       OP_RLWNM  = 23<<26 | 0<<1 | 0<<10 | 0
        OP_SUBF   = 31<<26 | 40<<1 | 0<<10 | 0
        OP_RLDIC  = 30<<26 | 4<<1 | 0<<10 | 0
        OP_RLDICR = 30<<26 | 2<<1 | 0<<10 | 0
        OP_RLDICL = 30<<26 | 0<<1 | 0<<10 | 0
+       OP_RLDCL  = 30<<26 | 8<<1 | 0<<10 | 0
 )
 
 func oclass(a *obj.Addr) int {
@@ -2258,7 +2262,15 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                if r == 0 {
                        r = int(p.To.Reg)
                }
-               o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
+               // AROTL and AROTLW are extended mnemonics, which map to RLDCL and RLWNM.
+               switch p.As {
+               case AROTL:
+                       o1 = AOP_RLDIC(OP_RLDCL, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), uint32(0))
+               case AROTLW:
+                       o1 = OP_RLW(OP_RLWNM, uint32(p.To.Reg), uint32(r), uint32(p.From.Reg), 0, 31)
+               default:
+                       o1 = LOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
+               }
 
        case 7: /* mov r, soreg ==> stw o(r) */
                r := int(p.To.Reg)
@@ -2636,32 +2648,28 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                        r = int(p.To.Reg)
                }
                var a int
+               op := uint32(0)
                switch p.As {
                case ASLD, ASLDCC:
                        a = int(63 - v)
-                       o1 = OP_RLDICR
+                       op = OP_RLDICR
 
                case ASRD, ASRDCC:
                        a = int(v)
                        v = 64 - v
-                       o1 = OP_RLDICL
-
+                       op = OP_RLDICL
+               case AROTL:
+                       a = int(0)
+                       op = OP_RLDICL
                default:
                        c.ctxt.Diag("unexpected op in sldi case\n%v", p)
                        a = 0
                        o1 = 0
                }
 
-               o1 = AOP_RRR(o1, uint32(r), uint32(p.To.Reg), (uint32(v) & 0x1F))
-               o1 |= (uint32(a) & 31) << 6
-               if v&0x20 != 0 {
-                       o1 |= 1 << 1
-               }
-               if a&0x20 != 0 {
-                       o1 |= 1 << 5 /* mb[5] is top bit */
-               }
+               o1 = AOP_RLDIC(op, uint32(p.To.Reg), uint32(r), uint32(v), uint32(a))
                if p.As == ASLDCC || p.As == ASRDCC {
-                       o1 |= 1 /* Rc */
+                       o1 |= 1 // Set the condition code bit
                }
 
        case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */
@@ -2978,18 +2986,18 @@ func (c *ctxt9) asmout(p *obj.Prog, o *Optab, out []uint32) {
                        v = 32
                }
                var mask [2]uint8
-               if p.As == ASRW || p.As == ASRWCC { /* shift right */
-                       mask[0] = uint8(v)
-                       mask[1] = 31
+               switch p.As {
+               case AROTLW:
+                       mask[0], mask[1] = 0, 31
+               case ASRW, ASRWCC:
+                       mask[0], mask[1] = uint8(v), 31
                        v = 32 - v
-               } else {
-                       mask[0] = 0
-                       mask[1] = uint8(31 - v)
+               default:
+                       mask[0], mask[1] = 0, uint8(31-v)
                }
-
                o1 = OP_RLW(OP_RLWINM, uint32(p.To.Reg), uint32(r), uint32(v), uint32(mask[0]), uint32(mask[1]))
                if p.As == ASLWCC || p.As == ASRWCC {
-                       o1 |= 1 /* Rc */
+                       o1 |= 1 // set the condition code
                }
 
        case 58: /* logical $andcon,[s],a */