]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: intrinsify math/bits.Sub64 for arm64
authorerifan01 <eric.fang@arm.com>
Wed, 20 Mar 2019 12:46:20 +0000 (12:46 +0000)
committerCherry Zhang <cherryyz@google.com>
Mon, 22 Apr 2019 14:40:20 +0000 (14:40 +0000)
This CL instrinsifies Sub64 with arm64 instruction sequence NEGS, SBCS,
NGC and NEG, and optimzes the case of borrowing chains.

Benchmarks:
name              old time/op       new time/op       delta
Sub-64            2.500000ns +- 0%  2.048000ns +- 1%  -18.08%  (p=0.000 n=10+10)
Sub32-64          2.500000ns +- 0%  2.500000ns +- 0%     ~     (all equal)
Sub64-64          2.500000ns +- 0%  2.080000ns +- 0%  -16.80%  (p=0.000 n=10+7)
Sub64multiple-64  7.090000ns +- 0%  2.090000ns +- 0%  -70.52%  (p=0.000 n=10+10)

Change-Id: I3d2664e009a9635e13b55d2c4567c7b34c2c0655
Reviewed-on: https://go-review.googlesource.com/c/go/+/159018
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/gc/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/mathbits.go

index be4ddb4b6b67b69014053517519d49384bf3c450..d3fc89d400e4f8c7f9bc86a7d22e061c73fd7751 100644 (file)
@@ -260,7 +260,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.Reg = arm64.REGZERO
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
-       case ssa.OpARM64ADCSflags, ssa.OpARM64ADDSflags:
+       case ssa.OpARM64ADCSflags,
+               ssa.OpARM64ADDSflags,
+               ssa.OpARM64SBCSflags,
+               ssa.OpARM64SUBSflags:
                r := v.Reg0()
                r1 := v.Args[0].Reg()
                r2 := v.Args[1].Reg()
@@ -270,6 +273,18 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
                p.Reg = r1
                p.To.Type = obj.TYPE_REG
                p.To.Reg = r
+       case ssa.OpARM64NEGSflags:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = v.Args[0].Reg()
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg0()
+       case ssa.OpARM64NGCzerocarry:
+               p := s.Prog(v.Op.Asm())
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = arm64.REGZERO
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = v.Reg()
        case ssa.OpARM64EXTRconst,
                ssa.OpARM64EXTRWconst:
                p := s.Prog(v.Op.Asm())
index 8159dc7bcad74f9716ed23277305cad381203666..930779045ab9d18895326f61fa58e769a5f2c77f 100644 (file)
@@ -3579,8 +3579,8 @@ func init() {
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
                },
-               sys.AMD64)
-       alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64)
+               sys.AMD64, sys.ARM64)
+       alias("math/bits", "Sub", "math/bits", "Sub64", sys.ArchAMD64, sys.ArchARM64)
        addF("math/bits", "Div64",
                func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
                        // check for divide-by-zero/overflow and panic with appropriate message
index 70b1681c63635e206a5a7e73b82c2838eefd000a..de7ab3861d98c85326b5e45018cd9521c0afb862 100644 (file)
 (Select0 (Add64carry x y c)) -> (Select0 <typ.UInt64> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c))))
 (Select1 (Add64carry x y c)) -> (ADCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c)))))
 
+// 64-bit subtraction with borrowing.
+(Select0 (Sub64borrow x y bo)) -> (Select0 <typ.UInt64> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))
+(Select1 (Sub64borrow x y bo)) -> (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))))
+
 // boolean ops -- booleans are represented with 0=false, 1=true
 (AndB x y) -> (AND x y)
 (OrB x y) -> (OR x y)
 (ADD a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MSUBW a x y)
 (SUB a l:(MNEGW x y)) && a.Type.Size() != 8 && l.Uses==1 && clobber(l) -> (MADDW a x y)
 
-// optimize ADCSflags and friends
+// optimize ADCSflags, SBCSflags and friends
 (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (ADCzerocarry <typ.UInt64> c)))) -> (ADCSflags x y c)
 (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (MOVDconst [0])))) -> (ADDSflags x y)
+(SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> bo))))) -> (SBCSflags x y bo)
+(SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (MOVDconst [0])))) -> (SUBSflags x y)
 
 // mul by constant
 (MUL x (MOVDconst [-1])) -> (NEG x)
index a885a8f467c9609e1a1fe221596539f33e45cf25..ece53eb750ed9707e024a45616d2243a60a1071f 100644 (file)
@@ -183,6 +183,8 @@ func init() {
                {name: "ADDSflags", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "ADDS", commutative: true},      // arg0+arg1, set flags.
                {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                                                            // arg0 - arg1
                {name: "SUBconst", argLength: 1, reg: gp11, asm: "SUB", aux: "Int64"},                                         // arg0 - auxInt
+               {name: "SBCSflags", argLength: 3, reg: gp2flags1flags, typ: "(UInt64,Flags)", asm: "SBCS"},                    // arg0-(arg1+borrowing), set flags.
+               {name: "SUBSflags", argLength: 2, reg: gp21flags, typ: "(UInt64,Flags)", asm: "SUBS"},                         // arg0 - arg1, set flags.
                {name: "MUL", argLength: 2, reg: gp21, asm: "MUL", commutative: true},                                         // arg0 * arg1
                {name: "MULW", argLength: 2, reg: gp21, asm: "MULW", commutative: true},                                       // arg0 * arg1, 32-bit
                {name: "MNEG", argLength: 2, reg: gp21, asm: "MNEG", commutative: true},                                       // -arg0 * arg1
@@ -224,21 +226,23 @@ func init() {
                {name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo)
 
                // unary ops
-               {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"},         // ^arg0
-               {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},         // -arg0
-               {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"},     // abs(arg0), float64
-               {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"},     // -arg0, float32
-               {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"},     // -arg0, float64
-               {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"},   // sqrt(arg0), float64
-               {name: "REV", argLength: 1, reg: gp11, asm: "REV"},         // byte reverse, 64-bit
-               {name: "REVW", argLength: 1, reg: gp11, asm: "REVW"},       // byte reverse, 32-bit
-               {name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"},   // byte reverse in each 16-bit halfword, 32-bit
-               {name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"},       // bit reverse, 64-bit
-               {name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW"},     // bit reverse, 32-bit
-               {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"},         // count leading zero, 64-bit
-               {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"},       // count leading zero, 32-bit
-               {name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"},       // count set bits for each 8-bit unit and store the result in each 8-bit unit
-               {name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"}, // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
+               {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"},                                    // ^arg0
+               {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"},                                    // -arg0
+               {name: "NEGSflags", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "NEGS"}, // -arg0, set flags.
+               {name: "NGCzerocarry", argLength: 1, reg: gp0flags1, typ: "UInt64", asm: "NGC"},       // -1 if borrowing, 0 otherwise.
+               {name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD"},                                // abs(arg0), float64
+               {name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"},                                // -arg0, float32
+               {name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"},                                // -arg0, float64
+               {name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"},                              // sqrt(arg0), float64
+               {name: "REV", argLength: 1, reg: gp11, asm: "REV"},                                    // byte reverse, 64-bit
+               {name: "REVW", argLength: 1, reg: gp11, asm: "REVW"},                                  // byte reverse, 32-bit
+               {name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"},                              // byte reverse in each 16-bit halfword, 32-bit
+               {name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"},                                  // bit reverse, 64-bit
+               {name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW"},                                // bit reverse, 32-bit
+               {name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"},                                    // count leading zero, 64-bit
+               {name: "CLZW", argLength: 1, reg: gp11, asm: "CLZW"},                                  // count leading zero, 32-bit
+               {name: "VCNT", argLength: 1, reg: fp11, asm: "VCNT"},                                  // count set bits for each 8-bit unit and store the result in each 8-bit unit
+               {name: "VUADDLV", argLength: 1, reg: fp11, asm: "VUADDLV"},                            // unsigned sum of eight bytes in a 64-bit value, zero extended to 64-bit.
                {name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
                {name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true, zeroWidth: true},
 
index 06dcb2d7ac5573b63f0ac7b152023e183a566aca..1af77c88de62c5c12f6f7d8a7274803200b1aa49 100644 (file)
@@ -1149,6 +1149,8 @@ const (
        OpARM64ADDSflags
        OpARM64SUB
        OpARM64SUBconst
+       OpARM64SBCSflags
+       OpARM64SUBSflags
        OpARM64MUL
        OpARM64MULW
        OpARM64MNEG
@@ -1187,6 +1189,8 @@ const (
        OpARM64LoweredMuluhilo
        OpARM64MVN
        OpARM64NEG
+       OpARM64NEGSflags
+       OpARM64NGCzerocarry
        OpARM64FABSD
        OpARM64FNEGS
        OpARM64FNEGD
@@ -15260,6 +15264,36 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "SBCSflags",
+               argLen: 3,
+               asm:    arm64.ASBCS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                               {1, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "SUBSflags",
+               argLen: 2,
+               asm:    arm64.ASUBS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                               {1, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:        "MUL",
                argLen:      2,
@@ -15808,6 +15842,30 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "NEGSflags",
+               argLen: 1,
+               asm:    arm64.ANEGS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                       },
+                       outputs: []outputInfo{
+                               {1, 0},
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
+       {
+               name:   "NGCzerocarry",
+               argLen: 1,
+               asm:    arm64.ANGC,
+               reg: regInfo{
+                       outputs: []outputInfo{
+                               {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+                       },
+               },
+       },
        {
                name:   "FABSD",
                argLen: 1,
index 0f55a6f7d80a5c1554e1d05e51b5ac9139cdefe0..bc7f17dfb35fa3e2e713bfc1e1367b60884d5f47 100644 (file)
@@ -319,6 +319,8 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64RORWconst_0(v)
        case OpARM64RORconst:
                return rewriteValueARM64_OpARM64RORconst_0(v)
+       case OpARM64SBCSflags:
+               return rewriteValueARM64_OpARM64SBCSflags_0(v)
        case OpARM64SLL:
                return rewriteValueARM64_OpARM64SLL_0(v)
        case OpARM64SLLconst:
@@ -28509,6 +28511,80 @@ func rewriteValueARM64_OpARM64RORconst_0(v *Value) bool {
        }
        return false
 }
+func rewriteValueARM64_OpARM64SBCSflags_0(v *Value) bool {
+       b := v.Block
+       typ := &b.Func.Config.Types
+       // match: (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> bo)))))
+       // cond:
+       // result: (SBCSflags x y bo)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpSelect1 {
+                       break
+               }
+               if v_2.Type != types.TypeFlags {
+                       break
+               }
+               v_2_0 := v_2.Args[0]
+               if v_2_0.Op != OpARM64NEGSflags {
+                       break
+               }
+               v_2_0_0 := v_2_0.Args[0]
+               if v_2_0_0.Op != OpARM64NEG {
+                       break
+               }
+               if v_2_0_0.Type != typ.UInt64 {
+                       break
+               }
+               v_2_0_0_0 := v_2_0_0.Args[0]
+               if v_2_0_0_0.Op != OpARM64NGCzerocarry {
+                       break
+               }
+               if v_2_0_0_0.Type != typ.UInt64 {
+                       break
+               }
+               bo := v_2_0_0_0.Args[0]
+               v.reset(OpARM64SBCSflags)
+               v.AddArg(x)
+               v.AddArg(y)
+               v.AddArg(bo)
+               return true
+       }
+       // match: (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags (MOVDconst [0]))))
+       // cond:
+       // result: (SUBSflags x y)
+       for {
+               _ = v.Args[2]
+               x := v.Args[0]
+               y := v.Args[1]
+               v_2 := v.Args[2]
+               if v_2.Op != OpSelect1 {
+                       break
+               }
+               if v_2.Type != types.TypeFlags {
+                       break
+               }
+               v_2_0 := v_2.Args[0]
+               if v_2_0.Op != OpARM64NEGSflags {
+                       break
+               }
+               v_2_0_0 := v_2_0.Args[0]
+               if v_2_0_0.Op != OpARM64MOVDconst {
+                       break
+               }
+               if v_2_0_0.AuxInt != 0 {
+                       break
+               }
+               v.reset(OpARM64SUBSflags)
+               v.AddArg(x)
+               v.AddArg(y)
+               return true
+       }
+       return false
+}
 func rewriteValueARM64_OpARM64SLL_0(v *Value) bool {
        // match: (SLL x (MOVDconst [c]))
        // cond:
@@ -36898,6 +36974,30 @@ func rewriteValueARM64_OpSelect0_0(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Select0 (Sub64borrow x y bo))
+       // cond:
+       // result: (Select0 <typ.UInt64> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64borrow {
+                       break
+               }
+               bo := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpSelect0)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpARM64SBCSflags, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v0.AddArg(x)
+               v0.AddArg(y)
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpARM64NEGSflags, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v2.AddArg(bo)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpSelect1_0(v *Value) bool {
@@ -36930,6 +37030,34 @@ func rewriteValueARM64_OpSelect1_0(v *Value) bool {
                v.AddArg(v0)
                return true
        }
+       // match: (Select1 (Sub64borrow x y bo))
+       // cond:
+       // result: (NEG <typ.UInt64> (NGCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (SBCSflags x y (Select1 <types.TypeFlags> (NEGSflags bo))))))
+       for {
+               v_0 := v.Args[0]
+               if v_0.Op != OpSub64borrow {
+                       break
+               }
+               bo := v_0.Args[2]
+               x := v_0.Args[0]
+               y := v_0.Args[1]
+               v.reset(OpARM64NEG)
+               v.Type = typ.UInt64
+               v0 := b.NewValue0(v.Pos, OpARM64NGCzerocarry, typ.UInt64)
+               v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v2 := b.NewValue0(v.Pos, OpARM64SBCSflags, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v2.AddArg(x)
+               v2.AddArg(y)
+               v3 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
+               v4 := b.NewValue0(v.Pos, OpARM64NEGSflags, types.NewTuple(typ.UInt64, types.TypeFlags))
+               v4.AddArg(bo)
+               v3.AddArg(v4)
+               v2.AddArg(v3)
+               v1.AddArg(v2)
+               v0.AddArg(v1)
+               v.AddArg(v0)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpSignExt16to32_0(v *Value) bool {
index 6676c69188027c75a661cf84317e8a940c7e26c6..70874590fe97b1ea4cebfe3e52b88ba00cc68b3b 100644 (file)
@@ -446,21 +446,25 @@ func Add64M(p, q, r *[3]uint64) {
 
 func Sub(x, y, ci uint) (r, co uint) {
        // amd64:"NEGL","SBBQ","NEGQ"
+       // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
        return bits.Sub(x, y, ci)
 }
 
 func SubC(x, ci uint) (r, co uint) {
        // amd64:"NEGL","SBBQ","NEGQ"
+       // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
        return bits.Sub(x, 7, ci)
 }
 
 func SubZ(x, y uint) (r, co uint) {
        // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
+       // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
        return bits.Sub(x, y, 0)
 }
 
 func SubR(x, y, ci uint) uint {
        // amd64:"NEGL","SBBQ",-"NEGQ"
+       // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
        r, _ := bits.Sub(x, y, ci)
        return r
 }
@@ -468,27 +472,32 @@ func SubM(p, q, r *[3]uint) {
        var c uint
        r[0], c = bits.Sub(p[0], q[0], c)
        // amd64:"SBBQ",-"NEGL",-"NEGQ"
+       // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
        r[1], c = bits.Sub(p[1], q[1], c)
        r[2], c = bits.Sub(p[2], q[2], c)
 }
 
 func Sub64(x, y, ci uint64) (r, co uint64) {
        // amd64:"NEGL","SBBQ","NEGQ"
+       // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
        return bits.Sub64(x, y, ci)
 }
 
 func Sub64C(x, ci uint64) (r, co uint64) {
        // amd64:"NEGL","SBBQ","NEGQ"
+       // arm64:"NEGS","SBCS","NGC","NEG",-"ADD",-"SUB",-"CMP"
        return bits.Sub64(x, 7, ci)
 }
 
 func Sub64Z(x, y uint64) (r, co uint64) {
        // amd64:"SUBQ","SBBQ","NEGQ",-"NEGL"
+       // arm64:"SUBS","NGC","NEG",-"SBCS",-"ADD",-"SUB\t",-"CMP"
        return bits.Sub64(x, y, 0)
 }
 
 func Sub64R(x, y, ci uint64) uint64 {
        // amd64:"NEGL","SBBQ",-"NEGQ"
+       // arm64:"NEGS","SBCS",-"NGC",-"NEG\t",-"ADD",-"SUB",-"CMP"
        r, _ := bits.Sub64(x, y, ci)
        return r
 }
@@ -496,6 +505,7 @@ func Sub64M(p, q, r *[3]uint64) {
        var c uint64
        r[0], c = bits.Sub64(p[0], q[0], c)
        // amd64:"SBBQ",-"NEGL",-"NEGQ"
+       // arm64:"SBCS",-"NEGS",-"NGC",-"NEG",-"ADD",-"SUB",-"CMP"
        r[1], c = bits.Sub64(p[1], q[1], c)
        r[2], c = bits.Sub64(p[2], q[2], c)
 }