]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: optimize ARM64 code with CMN/TST
authorBalaram Makam <bmakam.qdt@qualcommdatacenter.com>
Tue, 24 Apr 2018 11:17:40 +0000 (07:17 -0400)
committerCherry Zhang <cherryyz@google.com>
Thu, 26 Apr 2018 14:13:12 +0000 (14:13 +0000)
Use CMN/TST to simplify comparisons. This can reduce the
register pressure by removing single def/use registers for example:
ADDW R0, R1, R8 -> CMNW R1, R0 ; CMN is an alias of ADDS.
CBZW R8, label  -> BEQ  label  ; single def/use of R8 removed.

Little change in performance of go1 benchmark on Amberwing:
name                   old time/op    new time/op    delta
RegexpMatchEasy0_32       247ns ± 0%     246ns ± 0%  -0.40%  (p=0.008 n=5+5)
RegexpMatchEasy0_1K       581ns ± 0%     580ns ± 0%    ~     (p=0.079 n=4+5)
RegexpMatchEasy1_32       244ns ± 0%     243ns ± 0%  -0.41%  (p=0.008 n=5+5)
RegexpMatchEasy1_1K       804ns ± 0%     806ns ± 0%  +0.25%  (p=0.016 n=5+4)
RegexpMatchMedium_32      313ns ± 0%     311ns ± 0%  -0.64%  (p=0.008 n=5+5)
RegexpMatchMedium_1K     52.2µs ± 0%    51.9µs ± 0%  -0.51%  (p=0.008 n=5+5)
RegexpMatchHard_32       2.76µs ± 3%    2.74µs ± 0%    ~     (p=0.683 n=5+5)
RegexpMatchHard_1K       78.8µs ± 0%    78.9µs ± 0%  +0.04%  (p=0.008 n=5+5)
FmtFprintfEmpty          58.6ns ± 0%    57.7ns ± 0%  -1.54%  (p=0.008 n=5+5)
FmtFprintfString          118ns ± 0%     115ns ± 0%  -2.54%  (p=0.008 n=5+5)
FmtFprintfInt             119ns ± 0%     119ns ± 0%    ~     (all equal)
FmtFprintfIntInt          192ns ± 0%     192ns ± 0%    ~     (all equal)
FmtFprintfPrefixedInt     224ns ± 0%     205ns ± 0%  -8.48%  (p=0.008 n=5+5)
FmtFprintfFloat           336ns ± 0%     333ns ± 1%    ~     (p=0.683 n=5+5)
FmtManyArgs               779ns ± 1%     760ns ± 1%  -2.41%  (p=0.008 n=5+5)
Gzip                      437ms ± 0%     436ms ± 0%  -0.27%  (p=0.008 n=5+5)
HTTPClientServer         90.1µs ± 1%    91.1µs ± 0%  +1.19%  (p=0.008 n=5+5)
JSONEncode               20.1ms ± 0%    20.2ms ± 1%    ~     (p=0.690 n=5+5)
JSONDecode               94.5ms ± 1%    94.1ms ± 1%    ~     (p=0.095 n=5+5)
Mandelbrot200            5.37ms ± 0%    5.37ms ± 0%    ~     (p=0.421 n=5+5)
TimeParse                 450ns ± 0%     446ns ± 0%  -0.89%  (p=0.000 n=5+4)
TimeFormat                483ns ± 1%     473ns ± 0%  -2.19%  (p=0.008 n=5+5)
Template                 90.6ms ± 0%    89.7ms ± 0%  -0.93%  (p=0.008 n=5+5)
GoParse                  5.97ms ± 0%    6.01ms ± 0%  +0.65%  (p=0.008 n=5+5)
BinaryTree17              11.8s ± 0%     11.7s ± 0%  -0.28%  (p=0.016 n=5+5)
Revcomp                   669ms ± 0%     669ms ± 0%    ~     (p=0.222 n=5+5)
Fannkuch11                3.28s ± 0%     3.34s ± 0%  +1.72%  (p=0.016 n=4+5)
[Geo mean]               46.6µs         46.3µs       -0.74%

name                   old speed      new speed      delta
RegexpMatchEasy0_32     129MB/s ± 0%   130MB/s ± 0%  +0.32%  (p=0.016 n=5+4)
RegexpMatchEasy0_1K    1.76GB/s ± 0%  1.76GB/s ± 0%  +0.13%  (p=0.016 n=4+5)
RegexpMatchEasy1_32     131MB/s ± 0%   132MB/s ± 0%  +0.32%  (p=0.008 n=5+5)
RegexpMatchEasy1_1K    1.27GB/s ± 0%  1.27GB/s ± 0%  -0.24%  (p=0.016 n=5+4)
RegexpMatchMedium_32   3.19MB/s ± 0%  3.21MB/s ± 0%  +0.63%  (p=0.008 n=5+5)
RegexpMatchMedium_1K   19.6MB/s ± 0%  19.7MB/s ± 0%  +0.51%  (p=0.029 n=4+4)
RegexpMatchHard_32     11.6MB/s ± 2%  11.7MB/s ± 0%    ~     (p=1.000 n=5+5)
RegexpMatchHard_1K     13.0MB/s ± 0%  13.0MB/s ± 0%    ~     (p=0.079 n=4+5)
Gzip                   44.4MB/s ± 0%  44.5MB/s ± 0%  +0.27%  (p=0.008 n=5+5)
JSONEncode             96.4MB/s ± 0%  96.2MB/s ± 1%    ~     (p=0.579 n=5+5)
JSONDecode             20.5MB/s ± 1%  20.6MB/s ± 1%    ~     (p=0.111 n=5+5)
Template               21.4MB/s ± 0%  21.6MB/s ± 0%  +0.94%  (p=0.008 n=5+5)
GoParse                9.70MB/s ± 0%  9.63MB/s ± 0%  -0.68%  (p=0.016 n=4+5)
Revcomp                 380MB/s ± 0%   380MB/s ± 0%    ~     (p=0.222 n=5+5)
[Geo mean]             55.3MB/s       55.4MB/s       +0.23%

Change-Id: I2e5338138991d9bc984e67b51212aa5d1b0f2a6b
Reviewed-on: https://go-review.googlesource.com/97335
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>

src/cmd/asm/internal/asm/testdata/arm64enc.s
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/internal/obj/arm64/asm7.go
src/cmd/internal/obj/arm64/obj7.go

index 11d82d8166b1d4e37751bdcc04ca94c2bc459ef3..5fa9af99fa8da1bf9c4eeb54264201ebbc7bad53 100644 (file)
@@ -33,7 +33,12 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
    AND $34903429696192636, R12, R19           // 93910e92
    ANDW R9@>7, R19, R26                       // 7a1ec90a
    AND R9@>7, R19, R26                        // 7a1ec98a
-   //TODO TST $2863311530, R24                // 1ff32972
+   TSTW $2863311530, R24                      // 1ff30172
+   TST R2, R0                                 // 1f0002ea
+   TST $7, R2                                 // 5f0840f2
+   ANDS R2, R0, ZR                            // 1f0002ea
+   ANDS $7, R2, ZR                            // 5f0840f2
+   ANDSW $2863311530, R24, ZR                 // 1ff30172
    ANDSW $2863311530, R24, R23                // 17f30172
    ANDS $-140737488289793, R2, R5             // 458051f2
    ANDSW R26->24, R21, R15                    // af629a6a
@@ -374,10 +379,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
    SXTW R0, R27                               // 1b7c4093
    SYSL $285440, R12                          // 0c5b2cd5
    //TODO TLBI
-   //TODO TST $0x80000007, R9                 // 3f0d0172
-   //TODO TST $0xfffffff0, LR                 // df6f7cf2
-   //TODO TSTW R10@>21, R2                    // 1f2f11ea
-   //TODO TST R17<<11, R24                    // 1f2f11ea
+   TSTW $0x80000007, R9                       // TSTW $2147483655, R9          // 3f0d0172
+   TST $0xfffffff0, LR                        // TST $4294967280, R30          // df6f7cf2
+   TSTW R10@>21, R2                           // 5f54ca6a
+   TST R17<<11, R24                           // 1f2f11ea
+   ANDSW $0x80000007, R9, ZR                  // ANDSW   $2147483655, R9, ZR   // 3f0d0172
+   ANDS $0xfffffff0, LR, ZR                   // ANDS    $4294967280, R30, ZR  // df6f7cf2
+   ANDSW R10@>21, R2, ZR                      // 5f54ca6a
+   ANDS R17<<11, R24, ZR                      // 1f2f11ea
    UBFIZW $3, R19, $14, R14                   // 6e361d53
    UBFIZ $3, R22, $14, R4                     // c4367dd3
    UBFXW $3, R7, $20, R15                     // ef580353
index ea3fe7a094294e2d6e3eb72f96fa77e8a6b52fe2..c7aea631c0dd07cf98351152a3985adad14f1cd3 100644 (file)
@@ -276,6 +276,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                ssa.OpARM64CMPW,
                ssa.OpARM64CMN,
                ssa.OpARM64CMNW,
+               ssa.OpARM64TST,
+               ssa.OpARM64TSTW,
                ssa.OpARM64FCMPS,
                ssa.OpARM64FCMPD:
                p := s.Prog(v.Op.Asm())
@@ -285,7 +287,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
        case ssa.OpARM64CMPconst,
                ssa.OpARM64CMPWconst,
                ssa.OpARM64CMNconst,
-               ssa.OpARM64CMNWconst:
+               ssa.OpARM64CMNWconst,
+               ssa.OpARM64TSTconst,
+               ssa.OpARM64TSTWconst:
                p := s.Prog(v.Op.Asm())
                p.From.Type = obj.TYPE_CONST
                p.From.Offset = v.AuxInt
index d8753414d94707ed0b8c0e5d5c9e8a5d502d1506..ff1f290542219452b32c5602120a4e23d916fc74 100644 (file)
 (NZ (GreaterEqual cc) yes no) -> (GE cc yes no)
 (NZ (GreaterEqualU cc) yes no) -> (UGE cc yes no)
 
+(EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (TSTWconst [c] y) yes no)
+(NE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (NE (TSTWconst [c] y) yes no)
+(LT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LT (TSTWconst [c] y) yes no)
+(LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LE (TSTWconst [c] y) yes no)
+(GT (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GT (TSTWconst [c] y) yes no)
+(GE (CMPWconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GE (TSTWconst [c] y) yes no)
+
+(EQ (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (EQ (TST x y) yes no)
+(NE (CMPconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (NE (TST x y) yes no)
+(EQ (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (EQ (TST x y) yes no)
+(NE (CMPWconst [0] z:(AND x y)) yes no) && z.Uses == 1 -> (NE (TST x y) yes no)
+
+(EQ (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (EQ (TSTconst [c] y) yes no)
+(NE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (NE (TSTconst [c] y) yes no)
+(LT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LT (TSTconst [c] y) yes no)
+(LE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (LE (TSTconst [c] y) yes no)
+(GT (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GT (TSTconst [c] y) yes no)
+(GE (CMPconst [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 -> (GE (TSTconst [c] y) yes no)
+
+(EQ (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (EQ (CMN x y) yes no)
+(NE (CMPconst [0] z:(ADD x y)) yes no) && z.Uses == 1 -> (NE (CMN x y) yes no)
+(EQ (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (EQ (CMN x y) yes no)
+(NE (CMP x z:(NEG y)) yes no) && z.Uses == 1 -> (NE (CMN x y) yes no)
+
 (EQ (CMPconst [0] x) yes no) -> (Z x yes no)
 (NE (CMPconst [0] x) yes no) -> (NZ x yes no)
 (EQ (CMPWconst [0] x) yes no) -> (ZW x yes no)
 (NZ (ANDconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
 (ZW  (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBZ  {ntz(int64(uint32(c)))} x yes no)
 (NZW (ANDconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBNZ {ntz(int64(uint32(c)))} x yes no)
+(EQ (TSTconst [c] x) yes no) && oneBit(c) -> (TBZ  {ntz(c)} x yes no)
+(NE (TSTconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
+(EQ (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBZ  {ntz(int64(uint32(c)))} x yes no)
+(NE (TSTWconst [c] x) yes no) && oneBit(int64(uint32(c))) -> (TBNZ {ntz(int64(uint32(c)))} x yes no)
 
 // Test sign-bit for signed comparisons against zero
 (GE (CMPWconst [0] x) yes no) -> (TBZ  {int64(31)} x yes no)
 (AND x (MOVDconst [c])) -> (ANDconst [c] x)
 (OR  x (MOVDconst [c])) -> (ORconst  [c] x)
 (XOR x (MOVDconst [c])) -> (XORconst [c] x)
+(TST x (MOVDconst [c])) -> (TSTconst [c] x)
+(CMN x (MOVDconst [c])) -> (CMNconst [c] x)
 (BIC x (MOVDconst [c])) -> (ANDconst [^c] x)
 (EON x (MOVDconst [c])) -> (XORconst [^c] x)
 (ORN x (MOVDconst [c])) -> (ORconst  [^c] x)
 (CMPWconst (MOVDconst [x]) [y]) && int32(x)<int32(y) && uint32(x)>uint32(y) -> (FlagLT_UGT)
 (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)<uint32(y) -> (FlagGT_ULT)
 (CMPWconst (MOVDconst [x]) [y]) && int32(x)>int32(y) && uint32(x)>uint32(y) -> (FlagGT_UGT)
+(TSTconst  (MOVDconst [x]) [y]) && int64(x&y)==0 -> (FlagEQ)
+(TSTconst  (MOVDconst [x]) [y]) && int64(x&y)<0  -> (FlagLT_UGT)
+(TSTconst  (MOVDconst [x]) [y]) && int64(x&y)>0  -> (FlagGT_UGT)
+(TSTWconst (MOVDconst [x]) [y]) && int32(x&y)==0 -> (FlagEQ)
+(TSTWconst (MOVDconst [x]) [y]) && int32(x&y)<0  -> (FlagLT_UGT)
+(TSTWconst (MOVDconst [x]) [y]) && int32(x&y)>0  -> (FlagGT_UGT)
+(CMNconst  (MOVDconst [x]) [y]) && int64(x)==int64(-y) -> (FlagEQ)
+(CMNconst  (MOVDconst [x]) [y]) && int64(x)<int64(-y) && uint64(x)<uint64(-y) -> (FlagLT_ULT)
+(CMNconst  (MOVDconst [x]) [y]) && int64(x)<int64(-y) && uint64(x)>uint64(-y) -> (FlagLT_UGT)
+(CMNconst  (MOVDconst [x]) [y]) && int64(x)>int64(-y) && uint64(x)<uint64(-y) -> (FlagGT_ULT)
+(CMNconst  (MOVDconst [x]) [y]) && int64(x)>int64(-y) && uint64(x)>uint64(-y) -> (FlagGT_UGT)
+(CMNWconst (MOVDconst [x]) [y]) && int32(x)==int32(-y) -> (FlagEQ)
+(CMNWconst  (MOVDconst [x]) [y]) && int32(x)<int32(-y) && uint32(x)<uint32(-y) -> (FlagLT_ULT)
+(CMNWconst  (MOVDconst [x]) [y]) && int32(x)<int32(-y) && uint32(x)>uint32(-y) -> (FlagLT_UGT)
+(CMNWconst  (MOVDconst [x]) [y]) && int32(x)>int32(-y) && uint32(x)<uint32(-y) -> (FlagGT_ULT)
+(CMNWconst  (MOVDconst [x]) [y]) && int32(x)>int32(-y) && uint32(x)>uint32(-y) -> (FlagGT_UGT)
+
 
 // other known comparisons
 (CMPconst (MOVBUreg _) [c]) && 0xff < c -> (FlagLT_ULT)
index c90d1439cdd7785bf35075b1728736d3bd4ea46d..5ee984027bdc3a649602ee967360565e2aafd6b6 100644 (file)
@@ -255,6 +255,10 @@ func init() {
                {name: "CMNconst", argLength: 1, reg: gp1flags, asm: "CMN", aux: "Int64", typ: "Flags"},   // arg0 compare to -auxInt
                {name: "CMNW", argLength: 2, reg: gp2flags, asm: "CMNW", typ: "Flags"},                    // arg0 compare to -arg1, 32 bit
                {name: "CMNWconst", argLength: 1, reg: gp1flags, asm: "CMNW", aux: "Int32", typ: "Flags"}, // arg0 compare to -auxInt, 32 bit
+               {name: "TST", argLength: 2, reg: gp2flags, asm: "TST", typ: "Flags"},                      // arg0 & arg1 compare to 0
+               {name: "TSTconst", argLength: 1, reg: gp1flags, asm: "TST", aux: "Int64", typ: "Flags"},   // arg0 & auxInt compare to 0
+               {name: "TSTW", argLength: 2, reg: gp2flags, asm: "TSTW", typ: "Flags"},                    // arg0 & arg1 compare to 0, 32 bit
+               {name: "TSTWconst", argLength: 1, reg: gp1flags, asm: "TSTW", aux: "Int32", typ: "Flags"}, // arg0 & auxInt compare to 0, 32 bit
                {name: "FCMPS", argLength: 2, reg: fp2flags, asm: "FCMPS", typ: "Flags"},                  // arg0 compare to arg1, float32
                {name: "FCMPD", argLength: 2, reg: fp2flags, asm: "FCMPD", typ: "Flags"},                  // arg0 compare to arg1, float64
 
index de04eacfa8e72488e6d4935c30b1851211ff36a9..9236080a011d8699c840caab91103fc957b79490 100644 (file)
@@ -1109,6 +1109,10 @@ const (
        OpARM64CMNconst
        OpARM64CMNW
        OpARM64CMNWconst
+       OpARM64TST
+       OpARM64TSTconst
+       OpARM64TSTW
+       OpARM64TSTWconst
        OpARM64FCMPS
        OpARM64FCMPD
        OpARM64ADDshiftLL
@@ -14480,6 +14484,50 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "TST",
+               argLen: 2,
+               asm:    arm64.ATST,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+               },
+       },
+       {
+               name:    "TSTconst",
+               auxType: auxInt64,
+               argLen:  1,
+               asm:     arm64.ATST,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+               },
+       },
+       {
+               name:   "TSTW",
+               argLen: 2,
+               asm:    arm64.ATSTW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+               },
+       },
+       {
+               name:    "TSTWconst",
+               auxType: auxInt32,
+               argLen:  1,
+               asm:     arm64.ATSTW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+               },
+       },
        {
                name:   "FCMPS",
                argLen: 2,
index 8317316f7ef3c6df395e5a9ad68578c24053c11b..dac8e1fbce557df8427cce75ce2eb2279b8e8ae5 100644 (file)
@@ -43,6 +43,12 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64BICshiftRA_0(v)
        case OpARM64BICshiftRL:
                return rewriteValueARM64_OpARM64BICshiftRL_0(v)
+       case OpARM64CMN:
+               return rewriteValueARM64_OpARM64CMN_0(v)
+       case OpARM64CMNWconst:
+               return rewriteValueARM64_OpARM64CMNWconst_0(v)
+       case OpARM64CMNconst:
+               return rewriteValueARM64_OpARM64CMNconst_0(v)
        case OpARM64CMP:
                return rewriteValueARM64_OpARM64CMP_0(v)
        case OpARM64CMPW:
@@ -257,6 +263,12 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64SUBshiftRA_0(v)
        case OpARM64SUBshiftRL:
                return rewriteValueARM64_OpARM64SUBshiftRL_0(v)
+       case OpARM64TST:
+               return rewriteValueARM64_OpARM64TST_0(v)
+       case OpARM64TSTWconst:
+               return rewriteValueARM64_OpARM64TSTWconst_0(v)
+       case OpARM64TSTconst:
+               return rewriteValueARM64_OpARM64TSTconst_0(v)
        case OpARM64UBFIZ:
                return rewriteValueARM64_OpARM64UBFIZ_0(v)
        case OpARM64UBFX:
@@ -2125,6 +2137,191 @@ func rewriteValueARM64_OpARM64BICshiftRL_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64CMN_0(v *Value) bool {
+       // match: (CMN x (MOVDconst [c]))
+       // cond:
+       // result: (CMNconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64CMNconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64CMNWconst_0(v *Value) bool {
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)==int32(-y)
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x) == int32(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagEQ)
+               return true
+       }
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)<int32(-y) && uint32(x)<uint32(-y)
+       // result: (FlagLT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x) < int32(-y) && uint32(x) < uint32(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
+               return true
+       }
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)<int32(-y) && uint32(x)>uint32(-y)
+       // result: (FlagLT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x) < int32(-y) && uint32(x) > uint32(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_UGT)
+               return true
+       }
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)>int32(-y) && uint32(x)<uint32(-y)
+       // result: (FlagGT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x) > int32(-y) && uint32(x) < uint32(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_ULT)
+               return true
+       }
+       // match: (CMNWconst (MOVDconst [x]) [y])
+       // cond: int32(x)>int32(-y) && uint32(x)>uint32(-y)
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x) > int32(-y) && uint32(x) > uint32(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_UGT)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64CMNconst_0(v *Value) bool {
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)==int64(-y)
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x) == int64(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagEQ)
+               return true
+       }
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)<int64(-y) && uint64(x)<uint64(-y)
+       // result: (FlagLT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x) < int64(-y) && uint64(x) < uint64(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_ULT)
+               return true
+       }
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)<int64(-y) && uint64(x)>uint64(-y)
+       // result: (FlagLT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x) < int64(-y) && uint64(x) > uint64(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_UGT)
+               return true
+       }
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)>int64(-y) && uint64(x)<uint64(-y)
+       // result: (FlagGT_ULT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x) > int64(-y) && uint64(x) < uint64(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_ULT)
+               return true
+       }
+       // match: (CMNconst (MOVDconst [x]) [y])
+       // cond: int64(x)>int64(-y) && uint64(x)>uint64(-y)
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x) > int64(-y) && uint64(x) > uint64(-y)) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_UGT)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64CMP_0(v *Value) bool {
        b := v.Block
        _ = b
@@ -19487,6 +19684,127 @@ func rewriteValueARM64_OpARM64SUBshiftRL_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64TST_0(v *Value) bool {
+       // match: (TST x (MOVDconst [c]))
+       // cond:
+       // result: (TSTconst [c] x)
+       for {
+               _ = v.Args[1]
+               x := v.Args[0]
+               v_1 := v.Args[1]
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := v_1.AuxInt
+               v.reset(OpARM64TSTconst)
+               v.AuxInt = c
+               v.AddArg(x)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64TSTWconst_0(v *Value) bool {
+       // match: (TSTWconst (MOVDconst [x]) [y])
+       // cond: int32(x&y)==0
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x&y) == 0) {
+                       break
+               }
+               v.reset(OpARM64FlagEQ)
+               return true
+       }
+       // match: (TSTWconst (MOVDconst [x]) [y])
+       // cond: int32(x&y)<0
+       // result: (FlagLT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x&y) < 0) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_UGT)
+               return true
+       }
+       // match: (TSTWconst (MOVDconst [x]) [y])
+       // cond: int32(x&y)>0
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int32(x&y) > 0) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_UGT)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64TSTconst_0(v *Value) bool {
+       // match: (TSTconst (MOVDconst [x]) [y])
+       // cond: int64(x&y)==0
+       // result: (FlagEQ)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x&y) == 0) {
+                       break
+               }
+               v.reset(OpARM64FlagEQ)
+               return true
+       }
+       // match: (TSTconst (MOVDconst [x]) [y])
+       // cond: int64(x&y)<0
+       // result: (FlagLT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x&y) < 0) {
+                       break
+               }
+               v.reset(OpARM64FlagLT_UGT)
+               return true
+       }
+       // match: (TSTconst (MOVDconst [x]) [y])
+       // cond: int64(x&y)>0
+       // result: (FlagGT_UGT)
+       for {
+               y := v.AuxInt
+               v_0 := v.Args[0]
+               if v_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               x := v_0.AuxInt
+               if !(int64(x&y) > 0) {
+                       break
+               }
+               v.reset(OpARM64FlagGT_UGT)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64UBFIZ_0(v *Value) bool {
        // match: (UBFIZ [bfc] (SLLconst [sc] x))
        // cond: sc < getARM64BFwidth(bfc)
@@ -26750,23 +27068,192 @@ func rewriteBlockARM64(b *Block) bool {
        _ = typ
        switch b.Kind {
        case BlockARM64EQ:
-               // match: (EQ (CMPconst [0] x) yes no)
-               // cond:
-               // result: (Z x yes no)
+               // match: (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (EQ (TSTWconst [c] y) yes no)
                for {
                        v := b.Control
-                       if v.Op != OpARM64CMPconst {
+                       if v.Op != OpARM64CMPWconst {
                                break
                        }
                        if v.AuxInt != 0 {
                                break
                        }
                        x := v.Args[0]
-                       b.Kind = BlockARM64Z
-                       b.SetControl(x)
-                       b.Aux = nil
-                       return true
-               }
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTWconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (EQ (CMPconst [0] z:(AND x y)) yes no)
+               // cond: z.Uses == 1
+               // result: (EQ (TST x y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64AND {
+                               break
+                       }
+                       _ = z.Args[1]
+                       x := z.Args[0]
+                       y := z.Args[1]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64TST, types.TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (EQ (CMPWconst [0] z:(AND x y)) yes no)
+               // cond: z.Uses == 1
+               // result: (EQ (TST x y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64AND {
+                               break
+                       }
+                       _ = z.Args[1]
+                       x := z.Args[0]
+                       y := z.Args[1]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64TST, types.TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (EQ (CMPconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (EQ (TSTconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (EQ (CMPconst [0] z:(ADD x y)) yes no)
+               // cond: z.Uses == 1
+               // result: (EQ (CMN x y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64ADD {
+                               break
+                       }
+                       _ = z.Args[1]
+                       x := z.Args[0]
+                       y := z.Args[1]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (EQ (CMP x z:(NEG y)) yes no)
+               // cond: z.Uses == 1
+               // result: (EQ (CMN x y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMP {
+                               break
+                       }
+                       _ = v.Args[1]
+                       x := v.Args[0]
+                       z := v.Args[1]
+                       if z.Op != OpARM64NEG {
+                               break
+                       }
+                       y := z.Args[0]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64EQ
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (EQ (CMPconst [0] x) yes no)
+               // cond:
+               // result: (Z x yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       b.Kind = BlockARM64Z
+                       b.SetControl(x)
+                       b.Aux = nil
+                       return true
+               }
                // match: (EQ (CMPWconst [0] x) yes no)
                // cond:
                // result: (ZW x yes no)
@@ -26784,6 +27271,42 @@ func rewriteBlockARM64(b *Block) bool {
                        b.Aux = nil
                        return true
                }
+               // match: (EQ (TSTconst [c] x) yes no)
+               // cond: oneBit(c)
+               // result: (TBZ {ntz(c)} x yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64TSTconst {
+                               break
+                       }
+                       c := v.AuxInt
+                       x := v.Args[0]
+                       if !(oneBit(c)) {
+                               break
+                       }
+                       b.Kind = BlockARM64TBZ
+                       b.SetControl(x)
+                       b.Aux = ntz(c)
+                       return true
+               }
+               // match: (EQ (TSTWconst [c] x) yes no)
+               // cond: oneBit(int64(uint32(c)))
+               // result: (TBZ {ntz(int64(uint32(c)))} x yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64TSTWconst {
+                               break
+                       }
+                       c := v.AuxInt
+                       x := v.Args[0]
+                       if !(oneBit(int64(uint32(c)))) {
+                               break
+                       }
+                       b.Kind = BlockARM64TBZ
+                       b.SetControl(x)
+                       b.Aux = ntz(int64(uint32(c)))
+                       return true
+               }
                // match: (EQ (FlagEQ) yes no)
                // cond:
                // result: (First nil yes no)
@@ -26868,6 +27391,62 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64GE:
+               // match: (GE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (GE (TSTWconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GE
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTWconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (GE (CMPconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (GE (TSTconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GE
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (GE (CMPWconst [0] x) yes no)
                // cond:
                // result: (TBZ {int64(31)} x yes no)
@@ -26984,6 +27563,62 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64GT:
+               // match: (GT (CMPWconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (GT (TSTWconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GT
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTWconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (GT (CMPconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (GT (TSTconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64GT
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (GT (FlagEQ) yes no)
                // cond:
                // result: (First nil no yes)
@@ -27220,6 +27855,62 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64LE:
+               // match: (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (LE (TSTWconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LE
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTWconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (LE (CMPconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (LE (TSTconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LE
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (LE (FlagEQ) yes no)
                // cond:
                // result: (First nil yes no)
@@ -27302,6 +27993,62 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64LT:
+               // match: (LT (CMPWconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (LT (TSTWconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LT
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTWconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (LT (CMPconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (LT (TSTconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64LT
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (LT (CMPWconst [0] x) yes no)
                // cond:
                // result: (TBNZ {int64(31)} x yes no)
@@ -27419,6 +28166,175 @@ func rewriteBlockARM64(b *Block) bool {
                        return true
                }
        case BlockARM64NE:
+               // match: (NE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (NE (TSTWconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTWconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (NE (CMPconst [0] z:(AND x y)) yes no)
+               // cond: z.Uses == 1
+               // result: (NE (TST x y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64AND {
+                               break
+                       }
+                       _ = z.Args[1]
+                       x := z.Args[0]
+                       y := z.Args[1]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64TST, types.TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (NE (CMPWconst [0] z:(AND x y)) yes no)
+               // cond: z.Uses == 1
+               // result: (NE (TST x y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPWconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64AND {
+                               break
+                       }
+                       _ = z.Args[1]
+                       x := z.Args[0]
+                       y := z.Args[1]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64TST, types.TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (NE (CMPconst [0] x:(ANDconst [c] y)) yes no)
+               // cond: x.Uses == 1
+               // result: (NE (TSTconst [c] y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       x := v.Args[0]
+                       if x.Op != OpARM64ANDconst {
+                               break
+                       }
+                       c := x.AuxInt
+                       y := x.Args[0]
+                       if !(x.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64TSTconst, types.TypeFlags)
+                       v0.AuxInt = c
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (NE (CMPconst [0] z:(ADD x y)) yes no)
+               // cond: z.Uses == 1
+               // result: (NE (CMN x y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMPconst {
+                               break
+                       }
+                       if v.AuxInt != 0 {
+                               break
+                       }
+                       z := v.Args[0]
+                       if z.Op != OpARM64ADD {
+                               break
+                       }
+                       _ = z.Args[1]
+                       x := z.Args[0]
+                       y := z.Args[1]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
+               // match: (NE (CMP x z:(NEG y)) yes no)
+               // cond: z.Uses == 1
+               // result: (NE (CMN x y) yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64CMP {
+                               break
+                       }
+                       _ = v.Args[1]
+                       x := v.Args[0]
+                       z := v.Args[1]
+                       if z.Op != OpARM64NEG {
+                               break
+                       }
+                       y := z.Args[0]
+                       if !(z.Uses == 1) {
+                               break
+                       }
+                       b.Kind = BlockARM64NE
+                       v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+                       v0.AddArg(x)
+                       v0.AddArg(y)
+                       b.SetControl(v0)
+                       b.Aux = nil
+                       return true
+               }
                // match: (NE (CMPconst [0] x) yes no)
                // cond:
                // result: (NZ x yes no)
@@ -27453,6 +28369,42 @@ func rewriteBlockARM64(b *Block) bool {
                        b.Aux = nil
                        return true
                }
+               // match: (NE (TSTconst [c] x) yes no)
+               // cond: oneBit(c)
+               // result: (TBNZ {ntz(c)} x yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64TSTconst {
+                               break
+                       }
+                       c := v.AuxInt
+                       x := v.Args[0]
+                       if !(oneBit(c)) {
+                               break
+                       }
+                       b.Kind = BlockARM64TBNZ
+                       b.SetControl(x)
+                       b.Aux = ntz(c)
+                       return true
+               }
+               // match: (NE (TSTWconst [c] x) yes no)
+               // cond: oneBit(int64(uint32(c)))
+               // result: (TBNZ {ntz(int64(uint32(c)))} x yes no)
+               for {
+                       v := b.Control
+                       if v.Op != OpARM64TSTWconst {
+                               break
+                       }
+                       c := v.AuxInt
+                       x := v.Args[0]
+                       if !(oneBit(int64(uint32(c)))) {
+                               break
+                       }
+                       b.Kind = BlockARM64TBNZ
+                       b.SetControl(x)
+                       b.Aux = ntz(int64(uint32(c)))
+                       return true
+               }
                // match: (NE (FlagEQ) yes no)
                // cond:
                // result: (First nil no yes)
index 0b5cc3d8911e11717ddc5e4f09e54a91d449b140..2b5ee28ddb3a43b3a167befd33be8d1a47f1a4db 100644 (file)
@@ -213,26 +213,32 @@ var optab = []Optab{
        {AAND, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
        {AANDS, C_REG, C_REG, C_REG, 1, 4, 0, 0, 0},
        {AANDS, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
+       {ATST, C_REG, C_REG, C_NONE, 1, 4, 0, 0, 0},
        {AAND, C_MBCON, C_REG, C_RSP, 53, 4, 0, 0, 0},
        {AAND, C_MBCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
        {AANDS, C_MBCON, C_REG, C_REG, 53, 4, 0, 0, 0},
        {AANDS, C_MBCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
+       {ATST, C_MBCON, C_REG, C_NONE, 53, 4, 0, 0, 0},
        {AAND, C_BITCON, C_REG, C_RSP, 53, 4, 0, 0, 0},
        {AAND, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
        {AANDS, C_BITCON, C_REG, C_REG, 53, 4, 0, 0, 0},
        {AANDS, C_BITCON, C_NONE, C_REG, 53, 4, 0, 0, 0},
+       {ATST, C_BITCON, C_REG, C_NONE, 53, 4, 0, 0, 0},
        {AAND, C_MOVCON, C_REG, C_RSP, 62, 8, 0, 0, 0},
        {AAND, C_MOVCON, C_NONE, C_REG, 62, 8, 0, 0, 0},
        {AANDS, C_MOVCON, C_REG, C_REG, 62, 8, 0, 0, 0},
        {AANDS, C_MOVCON, C_NONE, C_REG, 62, 8, 0, 0, 0},
+       {ATST, C_MOVCON, C_REG, C_NONE, 62, 8, 0, 0, 0},
        {AAND, C_VCON, C_REG, C_RSP, 28, 8, 0, LFROM, 0},
        {AAND, C_VCON, C_NONE, C_REG, 28, 8, 0, LFROM, 0},
        {AANDS, C_VCON, C_REG, C_REG, 28, 8, 0, LFROM, 0},
        {AANDS, C_VCON, C_NONE, C_REG, 28, 8, 0, LFROM, 0},
+       {ATST, C_VCON, C_REG, C_NONE, 28, 8, 0, LFROM, 0},
        {AAND, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
        {AAND, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
        {AANDS, C_SHIFT, C_REG, C_REG, 3, 4, 0, 0, 0},
        {AANDS, C_SHIFT, C_NONE, C_REG, 3, 4, 0, 0, 0},
+       {ATST, C_SHIFT, C_REG, C_NONE, 3, 4, 0, 0, 0},
        {AMOVD, C_RSP, C_NONE, C_RSP, 24, 4, 0, 0, 0},
        {AMVN, C_REG, C_NONE, C_REG, 24, 4, 0, 0, 0},
        {AMOVB, C_REG, C_NONE, C_REG, 45, 4, 0, 0, 0},
@@ -2980,14 +2986,18 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                if !(o1 != 0) {
                        break
                }
+               rt := int(p.To.Reg)
+               if p.To.Type == obj.TYPE_NONE {
+                       rt = REGZERO
+               }
                r := int(p.Reg)
                if r == 0 {
-                       r = int(p.To.Reg)
+                       r = rt
                }
                o2 = c.oprrr(p, p.As)
                o2 |= REGTMP & 31 << 16 /* shift is 0 */
                o2 |= uint32(r&31) << 5
-               o2 |= uint32(p.To.Reg & 31)
+               o2 |= uint32(rt & 31)
 
        case 29: /* op Rn, Rd */
                fc := c.aclass(&p.From)
@@ -3378,9 +3388,12 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
 
                o1 |= uint32((p.From.Offset & 0x7F) << 5)
 
-       case 53: /* and/or/eor/bic/... $bitcon, Rn, Rd */
+       case 53: /* and/or/eor/bic/tst/... $bitcon, Rn, Rd */
                a := p.As
                rt := int(p.To.Reg)
+               if p.To.Type == obj.TYPE_NONE {
+                       rt = REGZERO
+               }
                r := int(p.Reg)
                if r == 0 {
                        r = rt
@@ -3388,7 +3401,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                mode := 64
                v := uint64(p.From.Offset)
                switch p.As {
-               case AANDW, AORRW, AEORW, AANDSW:
+               case AANDW, AORRW, AEORW, AANDSW, ATSTW:
                        mode = 32
                case ABIC, AORN, AEON, ABICS:
                        v = ^v
@@ -4001,7 +4014,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                        size = 1
                }
 
-               o1 |= (Q&1) << 30 | (size&3) << 22 | uint32(rf&31) << 5 | uint32(rt&31)
+               o1 |= (Q&1)<<30 | (size&3)<<22 | uint32(rf&31)<<5 | uint32(rt&31)
 
        case 84: /* vst1 [Vt1.<T>, Vt2.<T>, ...], (Rn) */
                r := int(p.To.Reg)
@@ -4587,10 +4600,10 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
        case AEORW:
                return S32 | 2<<29 | 0xA<<24
 
-       case AANDS:
+       case AANDS, ATST:
                return S64 | 3<<29 | 0xA<<24
 
-       case AANDSW:
+       case AANDSW, ATSTW:
                return S32 | 3<<29 | 0xA<<24
 
        case ABIC:
@@ -5165,10 +5178,10 @@ func (c *ctxt7) opirr(p *obj.Prog, a obj.As) uint32 {
        case AEORW, AEONW:
                return S32 | 2<<29 | 0x24<<23 | 0<<22
 
-       case AANDS, ABICS:
+       case AANDS, ABICS, ATST:
                return S64 | 3<<29 | 0x24<<23
 
-       case AANDSW, ABICSW:
+       case AANDSW, ABICSW, ATSTW:
                return S32 | 3<<29 | 0x24<<23 | 0<<22
 
        case AASR:
index 9c3d22050e659ab850efdc25f9dc97f7e9056a6a..fe33b91820af8be6ab79506e02771c24e91c22ee 100644 (file)
@@ -311,7 +311,7 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
        // will zero the high 32-bit of the destination
        // register anyway.
        switch p.As {
-       case AANDW, AORRW, AEORW, AANDSW:
+       case AANDW, AORRW, AEORW, AANDSW, ATSTW:
                if p.From.Type == obj.TYPE_CONST {
                        v := p.From.Offset & 0xffffffff
                        p.From.Offset = v | v<<32