]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize codes with arm64 REV16 instruction
authorfanzha02 <fannie.zhang@arm.com>
Wed, 20 May 2020 08:49:59 +0000 (08:49 +0000)
committerfannie zhang <Fannie.Zhang@arm.com>
Tue, 23 Mar 2021 01:36:23 +0000 (01:36 +0000)
Optimize some patterns into rev16/rev16w instruction.

Pattern1:
    (c & 0xff00ff00)>>8 | (c & 0x00ff00ff)<<8
To:
    rev16w c

Pattern2:
    (c & 0xff00ff00ff00ff00)>>8 | (c & 0x00ff00ff00ff00ff)<<8
To:
    rev16 c

This patch is a copy of CL 239637, contributed by Alice Xu(dianhong.xu@arm.com).

Change-Id: I96936c1db87618bc1903c04221c7e9b2779455b3
Reviewed-on: https://go-review.googlesource.com/c/go/+/268377
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/compile/internal/test/testdata/arith_test.go
test/codegen/bitfield.go

index afd0d66d7254e03419c45e68ac47af1529dffed2..0c997bc4b3e82adaee8e20ea5bafaf29f6a01c7b 100644 (file)
@@ -915,6 +915,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpARM64FCVTDS,
                ssa.OpARM64REV,
                ssa.OpARM64REVW,
+               ssa.OpARM64REV16,
                ssa.OpARM64REV16W,
                ssa.OpARM64RBIT,
                ssa.OpARM64RBITW,
index 7f9f8298de8076f4b8818f187a5b1c767390dbeb..1d2efdabe0088051355ffa2bc0aa1e39552870ad 100644 (file)
                (CMPconst [64]  (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc == OpARM64LessThanU
        => (RORW x y)
 
+// rev16w | rev16
 // ((x>>8) | (x<<8)) => (REV16W x), the type of x is uint16, "|" can also be "^" or "+".
 ((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (UBFX <typ.UInt16> [armBFAuxInt(8, 8)] x) x) => (REV16W x)
 
+// ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+".
+((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x))
+       && uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff
+       => (REV16W x)
+
+// ((x & 0xff00ff00ff00ff00)>>8) | ((x & 0x00ff00ff00ff00ff)<<8), "|" can also be "^" or "+".
+((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       && (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff)
+       => (REV16 x)
+
+// ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+".
+((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       && (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff)
+       => (REV16 (ANDconst <x.Type> [0xffffffff] x))
+
 // Extract from reg pair
 (ADDshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)
 ( ORshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)
index 59a5ffaca4381bbda80cde365d9bcb0d25d10402..148843cd8daba78cd2f2215386dcc731d805a4e9 100644 (file)
@@ -239,6 +239,7 @@ func init() {
                {name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"},                              // sqrt(arg0), float32
                {name: "REV", argLength: 1, reg: gp11, asm: "REV"},                                    // byte reverse, 64-bit
                {name: "REVW", argLength: 1, reg: gp11, asm: "REVW"},                                  // byte reverse, 32-bit
+               {name: "REV16", argLength: 1, reg: gp11, asm: "REV16"},                                // byte reverse in each 16-bit halfword, 64-bit
                {name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"},                              // byte reverse in each 16-bit halfword, 32-bit
                {name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"},                                  // bit reverse, 64-bit
                {name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW"},                                // bit reverse, 32-bit
index db51ed95c5c283d2ec654a923329fa994d45422b..3f84a09289a884572cb744bd2a2d140d6716fd01 100644 (file)
@@ -1366,6 +1366,7 @@ const (
        OpARM64FSQRTS
        OpARM64REV
        OpARM64REVW
+       OpARM64REV16
        OpARM64REV16W
        OpARM64RBIT
        OpARM64RBITW
@@ -18212,6 +18213,19 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "REV16",
+               argLen: 1,
+               asm:    arm64.AREV16,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:   "REV16W",
                argLen: 1,
index eb650b6a50a6cc0115288a117c59b17ad6ab47f6..c9613100186cd6d7e66558ccbbe10bc9575ec4ce 100644 (file)
@@ -1772,6 +1772,81 @@ func rewriteValueARM64_OpARM64ADDshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (ADDshiftLL [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff
+       // result: (REV16W x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64UBFX || auxIntToArm64BitField(v_0.AuxInt) != armBFAuxInt(8, 24) {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff) {
+                       break
+               }
+               v.reset(OpARM64REV16W)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADDshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff)
+       // result: (REV16 x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff) {
+                       break
+               }
+               v.reset(OpARM64REV16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ADDshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff)
+       // result: (REV16 (ANDconst <x.Type> [0xffffffff] x))
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff) {
+                       break
+               }
+               v.reset(OpARM64REV16)
+               v0 := b.NewValue0(v.Pos, OpARM64ANDconst, x.Type)
+               v0.AuxInt = int64ToAuxInt(0xffffffff)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (ADDshiftLL [c] (SRLconst x [64-c]) x2)
        // result: (EXTRconst [64-c] x2 x)
        for {
@@ -17850,6 +17925,81 @@ func rewriteValueARM64_OpARM64ORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (ORshiftLL [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff
+       // result: (REV16W x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64UBFX || auxIntToArm64BitField(v_0.AuxInt) != armBFAuxInt(8, 24) {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff) {
+                       break
+               }
+               v.reset(OpARM64REV16W)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff)
+       // result: (REV16 x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff) {
+                       break
+               }
+               v.reset(OpARM64REV16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (ORshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff)
+       // result: (REV16 (ANDconst <x.Type> [0xffffffff] x))
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff) {
+                       break
+               }
+               v.reset(OpARM64REV16)
+               v0 := b.NewValue0(v.Pos, OpARM64ANDconst, x.Type)
+               v0.AuxInt = int64ToAuxInt(0xffffffff)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: ( ORshiftLL [c] (SRLconst x [64-c]) x2)
        // result: (EXTRconst [64-c] x2 x)
        for {
@@ -21723,6 +21873,81 @@ func rewriteValueARM64_OpARM64XORshiftLL(v *Value) bool {
                v.AddArg(x)
                return true
        }
+       // match: (XORshiftLL [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff
+       // result: (REV16W x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64UBFX || auxIntToArm64BitField(v_0.AuxInt) != armBFAuxInt(8, 24) {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff) {
+                       break
+               }
+               v.reset(OpARM64REV16W)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff)
+       // result: (REV16 x)
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff) {
+                       break
+               }
+               v.reset(OpARM64REV16)
+               v.AddArg(x)
+               return true
+       }
+       // match: (XORshiftLL [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
+       // cond: (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff)
+       // result: (REV16 (ANDconst <x.Type> [0xffffffff] x))
+       for {
+               if auxIntToInt64(v.AuxInt) != 8 || v_0.Op != OpARM64SRLconst || auxIntToInt64(v_0.AuxInt) != 8 {
+                       break
+               }
+               v_0_0 := v_0.Args[0]
+               if v_0_0.Op != OpARM64ANDconst {
+                       break
+               }
+               c1 := auxIntToInt64(v_0_0.AuxInt)
+               x := v_0_0.Args[0]
+               if v_1.Op != OpARM64ANDconst {
+                       break
+               }
+               c2 := auxIntToInt64(v_1.AuxInt)
+               if x != v_1.Args[0] || !(uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff) {
+                       break
+               }
+               v.reset(OpARM64REV16)
+               v0 := b.NewValue0(v.Pos, OpARM64ANDconst, x.Type)
+               v0.AuxInt = int64ToAuxInt(0xffffffff)
+               v0.AddArg(x)
+               v.AddArg(v0)
+               return true
+       }
        // match: (XORshiftLL [c] (SRLconst x [64-c]) x2)
        // result: (EXTRconst [64-c] x2 x)
        for {
index 158fedc28efec5fb67260f644b1693f03102539f..7d54a9181d1ef2e072c70e47a955ae017070325d 100644 (file)
@@ -1452,3 +1452,46 @@ func testDivisibility(t *testing.T) {
                }
        }
 }
+
+//go:noinline
+func genREV16_1(c uint64) uint64 {
+       b := ((c & 0xff00ff00ff00ff00) >> 8) | ((c & 0x00ff00ff00ff00ff) << 8)
+       return b
+}
+
+//go:noinline
+func genREV16_2(c uint64) uint64 {
+       b := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8)
+       return b
+}
+
+//go:noinline
+func genREV16W(c uint32) uint32 {
+       b := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8)
+       return b
+}
+
+func TestREV16(t *testing.T) {
+       x := uint64(0x8f7f6f5f4f3f2f1f)
+       want1 := uint64(0x7f8f5f6f3f4f1f2f)
+       want2 := uint64(0x3f4f1f2f)
+
+       got1 := genREV16_1(x)
+       if got1 != want1 {
+               t.Errorf("genREV16_1(%#x) = %#x want %#x", x, got1, want1)
+       }
+       got2 := genREV16_2(x)
+       if got2 != want2 {
+               t.Errorf("genREV16_2(%#x) = %#x want %#x", x, got2, want2)
+       }
+}
+
+func TestREV16W(t *testing.T) {
+       x := uint32(0x4f3f2f1f)
+       want := uint32(0x3f4f1f2f)
+
+       got := genREV16W(x)
+       if got != want {
+               t.Errorf("genREV16W(%#x) = %#x want %#x", x, got, want)
+       }
+}
index 7abc1c2783e54fa162a4a6001c1d1429c17cb296..8acefbd027b9f77025d52010cfbcdd2d0f9392c5 100644 (file)
@@ -244,3 +244,23 @@ func shift_no_cmp(x int) int {
        // mips64:`SLLV\t[$]17`,-`SGT`
        return x << 17
 }
+
+func rev16(c uint64) (uint64, uint64, uint64) {
+       // arm64:`REV16`,-`AND`,-`LSR`,-`AND`,-`ORR\tR[0-9]+<<8`
+       b1 := ((c & 0xff00ff00ff00ff00) >> 8) | ((c & 0x00ff00ff00ff00ff) << 8)
+       // arm64:-`ADD\tR[0-9]+<<8`
+       b2 := ((c & 0xff00ff00ff00ff00) >> 8) + ((c & 0x00ff00ff00ff00ff) << 8)
+       // arm64:-`EOR\tR[0-9]+<<8`
+       b3 := ((c & 0xff00ff00ff00ff00) >> 8) ^ ((c & 0x00ff00ff00ff00ff) << 8)
+       return b1, b2, b3
+}
+
+func rev16w(c uint32) (uint32, uint32, uint32) {
+       // arm64:`REV16W`,-`AND`,-`UBFX`,-`AND`,-`ORR\tR[0-9]+<<8`
+       b1 := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8)
+       // arm64:-`ADD\tR[0-9]+<<8`
+       b2 := ((c & 0xff00ff00) >> 8) + ((c & 0x00ff00ff) << 8)
+       // arm64:-`EOR\tR[0-9]+<<8`
+       b3 := ((c & 0xff00ff00) >> 8) ^ ((c & 0x00ff00ff) << 8)
+       return b1, b2, b3
+}