]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: ARM64 optimize []float64 and []float32 access
authorEgon Elbre <egonelbre@gmail.com>
Fri, 27 Nov 2020 15:10:33 +0000 (17:10 +0200)
committerCherry Zhang <cherryyz@google.com>
Wed, 24 Feb 2021 19:49:08 +0000 (19:49 +0000)
Optimize load and store to []float64 and []float32.
Previously it used LSL instead of shifted register indexed load/store.

Before:

    LSL   $3, R0, R0
    FMOVD F0, (R1)(R0)

After:

    FMOVD F0, (R1)(R0<<3)

Fixes #42798

Change-Id: I0c0912140c3dce5aa6abc27097c0eb93833cc589
Reviewed-on: https://go-review.googlesource.com/c/go/+/273706
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Giovanni Bajo <rasky@develer.com>

src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
test/codegen/floats.go
test/codegen/memops.go

index 73e74e12199b5ecdf0be97d91dc85a5c05c9b37f..ca5eac72bfed84e7ce9e07c4f475d5b41f3aad8a 100644 (file)
@@ -100,9 +100,11 @@ func genIndexedOperand(v *ssa.Value) obj.Addr {
        // Reg: base register, Index: (shifted) index register
        mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
        switch v.Op {
-       case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8:
+       case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
+               ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
                mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
-       case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4:
+       case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
+               ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
                mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
        case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
                mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
@@ -435,7 +437,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpARM64MOVHUloadidx2,
                ssa.OpARM64MOVWloadidx4,
                ssa.OpARM64MOVWUloadidx4,
-               ssa.OpARM64MOVDloadidx8:
+               ssa.OpARM64MOVDloadidx8,
+               ssa.OpARM64FMOVDloadidx8,
+               ssa.OpARM64FMOVSloadidx4:
                p := s.Prog(v.Op.Asm())
                p.From = genIndexedOperand(v)
                p.To.Type = obj.TYPE_REG
@@ -472,7 +476,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpARM64FMOVDstoreidx,
                ssa.OpARM64MOVHstoreidx2,
                ssa.OpARM64MOVWstoreidx4,
-               ssa.OpARM64MOVDstoreidx8:
+               ssa.OpARM64FMOVSstoreidx4,
+               ssa.OpARM64MOVDstoreidx8,
+               ssa.OpARM64FMOVDstoreidx8:
                p := s.Prog(v.Op.Asm())
                p.To = genIndexedOperand(v)
                p.From.Type = obj.TYPE_REG
index 4531c38a7aa0c0e40c9d142dcd0690f9fca5012d..98503748dbaa810aabf63e173d15dabaebc673ed 100644 (file)
 (MOVHUloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHUload [int32(c)<<1] ptr mem)
 (MOVHloadidx2  ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHload  [int32(c)<<1] ptr mem)
 
+(FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx8 ptr idx mem)
+(FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx4 ptr idx mem)
+(FMOVDloadidx ptr (SLLconst [3] idx) mem) => (FMOVDloadidx8 ptr idx mem)
+(FMOVSloadidx ptr (SLLconst [2] idx) mem) => (FMOVSloadidx4 ptr idx mem)
+(FMOVDloadidx (SLLconst [3] idx) ptr mem) => (FMOVDloadidx8 ptr idx mem)
+(FMOVSloadidx (SLLconst [2] idx) ptr mem) => (FMOVSloadidx4 ptr idx mem)
+(FMOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (FMOVDload ptr [int32(c)<<3] mem)
+(FMOVSloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (FMOVSload ptr [int32(c)<<2] mem)
+
 (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
        && (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
        (MOVBstore [off1+int32(off2)] {sym} ptr val mem)
 (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem)
 (MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem)
 
+(FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx8 ptr idx val mem)
+(FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx4 ptr idx val mem)
+(FMOVDstoreidx ptr (SLLconst [3] idx) val mem) => (FMOVDstoreidx8 ptr idx val mem)
+(FMOVSstoreidx ptr (SLLconst [2] idx) val mem) => (FMOVSstoreidx4 ptr idx val mem)
+(FMOVDstoreidx (SLLconst [3] idx) ptr val mem) => (FMOVDstoreidx8 ptr idx val mem)
+(FMOVSstoreidx (SLLconst [2] idx) ptr val mem) => (FMOVSstoreidx4 ptr idx val mem)
+(FMOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (FMOVDstore [int32(c)<<3] ptr val mem)
+(FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (FMOVSstore [int32(c)<<2] ptr val mem)
+
 (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
        && (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
index b0bc9c78ff6ca2dc8c42a4209b8844254062226d..e826e75252a79ea7aaf0bce5ae2fc0a99a7d24c0 100644 (file)
@@ -379,11 +379,13 @@ func init() {
                {name: "FMOVDloadidx", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1, arg2=mem.
 
                // shifted register indexed load
-               {name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"},    // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
-               {name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
-               {name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"},    // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
-               {name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
-               {name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"},   // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
+               {name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"},     // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"},  // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
+               {name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"},     // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
+               {name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"},  // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
+               {name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"},    // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
+               {name: "FMOVSloadidx4", argLength: 3, reg: fp2load, asm: "FMOVS", typ: "Float32"}, // load 32-bit float from arg0 + arg1*4, arg2 = mem.
+               {name: "FMOVDloadidx8", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1*8, arg2 = mem.
 
                {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store 1 byte of arg1 to arg0 + auxInt + aux.  arg2=mem.
                {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"},   // store 2 bytes of arg1 to arg0 + auxInt + aux.  arg2=mem.
@@ -402,9 +404,11 @@ func init() {
                {name: "FMOVDstoreidx", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1, arg3=mem.
 
                // shifted register indexed store
-               {name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
-               {name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
-               {name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
+               {name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"},   // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
+               {name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"},   // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
+               {name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"},   // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
+               {name: "FMOVSstoreidx4", argLength: 4, reg: fpstore2, asm: "FMOVS", typ: "Mem"}, // store 32-bit float of arg2 to arg0 + arg1*4, arg3=mem.
+               {name: "FMOVDstoreidx8", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1*8, arg3=mem.
 
                {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux.  arg1=mem.
                {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux.  arg1=mem.
index ba170968aeae869b6710ea74004738f885c7f914..551aa725b6d869b5bb400fa3e4219f35ad004c2b 100644 (file)
@@ -1481,6 +1481,8 @@ const (
        OpARM64MOVWloadidx4
        OpARM64MOVWUloadidx4
        OpARM64MOVDloadidx8
+       OpARM64FMOVSloadidx4
+       OpARM64FMOVDloadidx8
        OpARM64MOVBstore
        OpARM64MOVHstore
        OpARM64MOVWstore
@@ -1497,6 +1499,8 @@ const (
        OpARM64MOVHstoreidx2
        OpARM64MOVWstoreidx4
        OpARM64MOVDstoreidx8
+       OpARM64FMOVSstoreidx4
+       OpARM64FMOVDstoreidx8
        OpARM64MOVBstorezero
        OpARM64MOVHstorezero
        OpARM64MOVWstorezero
@@ -19787,6 +19791,34 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "FMOVSloadidx4",
+               argLen: 3,
+               asm:    arm64.AFMOVS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
+       {
+               name:   "FMOVDloadidx8",
+               argLen: 3,
+               asm:    arm64.AFMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                       },
+                       outputs: []outputInfo{
+                               {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
        {
                name:           "MOVBstore",
                auxType:        auxSymOff,
@@ -19994,6 +20026,30 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "FMOVSstoreidx4",
+               argLen: 4,
+               asm:    arm64.AFMOVS,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                               {2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
+       {
+               name:   "FMOVDstoreidx8",
+               argLen: 4,
+               asm:    arm64.AFMOVD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 805044223},           // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+                               {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+                               {2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+                       },
+               },
+       },
        {
                name:           "MOVBstorezero",
                auxType:        auxSymOff,
index ba146c7043174e80531ca73cdc42133ee0495a1a..ece834f9964765acdd1dccf10977772d0eedc6c3 100644 (file)
@@ -99,18 +99,26 @@ func rewriteValueARM64(v *Value) bool {
                return rewriteValueARM64_OpARM64FMOVDload(v)
        case OpARM64FMOVDloadidx:
                return rewriteValueARM64_OpARM64FMOVDloadidx(v)
+       case OpARM64FMOVDloadidx8:
+               return rewriteValueARM64_OpARM64FMOVDloadidx8(v)
        case OpARM64FMOVDstore:
                return rewriteValueARM64_OpARM64FMOVDstore(v)
        case OpARM64FMOVDstoreidx:
                return rewriteValueARM64_OpARM64FMOVDstoreidx(v)
+       case OpARM64FMOVDstoreidx8:
+               return rewriteValueARM64_OpARM64FMOVDstoreidx8(v)
        case OpARM64FMOVSload:
                return rewriteValueARM64_OpARM64FMOVSload(v)
        case OpARM64FMOVSloadidx:
                return rewriteValueARM64_OpARM64FMOVSloadidx(v)
+       case OpARM64FMOVSloadidx4:
+               return rewriteValueARM64_OpARM64FMOVSloadidx4(v)
        case OpARM64FMOVSstore:
                return rewriteValueARM64_OpARM64FMOVSstore(v)
        case OpARM64FMOVSstoreidx:
                return rewriteValueARM64_OpARM64FMOVSstoreidx(v)
+       case OpARM64FMOVSstoreidx4:
+               return rewriteValueARM64_OpARM64FMOVSstoreidx4(v)
        case OpARM64FMULD:
                return rewriteValueARM64_OpARM64FMULD(v)
        case OpARM64FMULS:
@@ -3900,6 +3908,25 @@ func rewriteValueARM64_OpARM64FMOVDload(v *Value) bool {
                v.AddArg3(ptr, idx, mem)
                return true
        }
+       // match: (FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (FMOVDloadidx8 ptr idx mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
+                       break
+               }
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64FMOVDloadidx8)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
        // match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@@ -3964,6 +3991,56 @@ func rewriteValueARM64_OpARM64FMOVDloadidx(v *Value) bool {
                v.AddArg2(ptr, mem)
                return true
        }
+       // match: (FMOVDloadidx ptr (SLLconst [3] idx) mem)
+       // result: (FMOVDloadidx8 ptr idx mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64FMOVDloadidx8)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (FMOVDloadidx (SLLconst [3] idx) ptr mem)
+       // result: (FMOVDloadidx8 ptr idx mem)
+       for {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v_1
+               mem := v_2
+               v.reset(OpARM64FMOVDloadidx8)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVDloadidx8(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (FMOVDloadidx8 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<3)
+       // result: (FMOVDload ptr [int32(c)<<3] mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c << 3)) {
+                       break
+               }
+               v.reset(OpARM64FMOVDload)
+               v.AuxInt = int32ToAuxInt(int32(c) << 3)
+               v.AddArg2(ptr, mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64FMOVDstore(v *Value) bool {
@@ -4031,6 +4108,26 @@ func rewriteValueARM64_OpARM64FMOVDstore(v *Value) bool {
                v.AddArg4(ptr, idx, val, mem)
                return true
        }
+       // match: (FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (FMOVDstoreidx8 ptr idx val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
+                       break
+               }
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64FMOVDstoreidx8)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
        // match: (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
@@ -4099,6 +4196,60 @@ func rewriteValueARM64_OpARM64FMOVDstoreidx(v *Value) bool {
                v.AddArg3(idx, val, mem)
                return true
        }
+       // match: (FMOVDstoreidx ptr (SLLconst [3] idx) val mem)
+       // result: (FMOVDstoreidx8 ptr idx val mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64FMOVDstoreidx8)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (FMOVDstoreidx (SLLconst [3] idx) ptr val mem)
+       // result: (FMOVDstoreidx8 ptr idx val mem)
+       for {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v_1
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64FMOVDstoreidx8)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVDstoreidx8(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (FMOVDstoreidx8 ptr (MOVDconst [c]) val mem)
+       // cond: is32Bit(c<<3)
+       // result: (FMOVDstore [int32(c)<<3] ptr val mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c << 3)) {
+                       break
+               }
+               v.reset(OpARM64FMOVDstore)
+               v.AuxInt = int32ToAuxInt(int32(c) << 3)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool {
@@ -4163,6 +4314,25 @@ func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool {
                v.AddArg3(ptr, idx, mem)
                return true
        }
+       // match: (FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+       // cond: off == 0 && sym == nil
+       // result: (FMOVSloadidx4 ptr idx mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
+                       break
+               }
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               mem := v_1
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64FMOVSloadidx4)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
        // match: (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
@@ -4227,6 +4397,56 @@ func rewriteValueARM64_OpARM64FMOVSloadidx(v *Value) bool {
                v.AddArg2(ptr, mem)
                return true
        }
+       // match: (FMOVSloadidx ptr (SLLconst [2] idx) mem)
+       // result: (FMOVSloadidx4 ptr idx mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               mem := v_2
+               v.reset(OpARM64FMOVSloadidx4)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       // match: (FMOVSloadidx (SLLconst [2] idx) ptr mem)
+       // result: (FMOVSloadidx4 ptr idx mem)
+       for {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v_1
+               mem := v_2
+               v.reset(OpARM64FMOVSloadidx4)
+               v.AddArg3(ptr, idx, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVSloadidx4(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (FMOVSloadidx4 ptr (MOVDconst [c]) mem)
+       // cond: is32Bit(c<<2)
+       // result: (FMOVSload ptr [int32(c)<<2] mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               mem := v_2
+               if !(is32Bit(c << 2)) {
+                       break
+               }
+               v.reset(OpARM64FMOVSload)
+               v.AuxInt = int32ToAuxInt(int32(c) << 2)
+               v.AddArg2(ptr, mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64FMOVSstore(v *Value) bool {
@@ -4294,6 +4514,26 @@ func rewriteValueARM64_OpARM64FMOVSstore(v *Value) bool {
                v.AddArg4(ptr, idx, val, mem)
                return true
        }
+       // match: (FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
+       // cond: off == 0 && sym == nil
+       // result: (FMOVSstoreidx4 ptr idx val mem)
+       for {
+               off := auxIntToInt32(v.AuxInt)
+               sym := auxToSym(v.Aux)
+               if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
+                       break
+               }
+               idx := v_0.Args[1]
+               ptr := v_0.Args[0]
+               val := v_1
+               mem := v_2
+               if !(off == 0 && sym == nil) {
+                       break
+               }
+               v.reset(OpARM64FMOVSstoreidx4)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
        // match: (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
        // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
        // result: (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
@@ -4362,6 +4602,60 @@ func rewriteValueARM64_OpARM64FMOVSstoreidx(v *Value) bool {
                v.AddArg3(idx, val, mem)
                return true
        }
+       // match: (FMOVSstoreidx ptr (SLLconst [2] idx) val mem)
+       // result: (FMOVSstoreidx4 ptr idx val mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
+                       break
+               }
+               idx := v_1.Args[0]
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64FMOVSstoreidx4)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       // match: (FMOVSstoreidx (SLLconst [2] idx) ptr val mem)
+       // result: (FMOVSstoreidx4 ptr idx val mem)
+       for {
+               if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
+                       break
+               }
+               idx := v_0.Args[0]
+               ptr := v_1
+               val := v_2
+               mem := v_3
+               v.reset(OpARM64FMOVSstoreidx4)
+               v.AddArg4(ptr, idx, val, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueARM64_OpARM64FMOVSstoreidx4(v *Value) bool {
+       v_3 := v.Args[3]
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (FMOVSstoreidx4 ptr (MOVDconst [c]) val mem)
+       // cond: is32Bit(c<<2)
+       // result: (FMOVSstore [int32(c)<<2] ptr val mem)
+       for {
+               ptr := v_0
+               if v_1.Op != OpARM64MOVDconst {
+                       break
+               }
+               c := auxIntToInt64(v_1.AuxInt)
+               val := v_2
+               mem := v_3
+               if !(is32Bit(c << 2)) {
+                       break
+               }
+               v.reset(OpARM64FMOVSstore)
+               v.AuxInt = int32ToAuxInt(int32(c) << 2)
+               v.AddArg3(ptr, val, mem)
+               return true
+       }
        return false
 }
 func rewriteValueARM64_OpARM64FMULD(v *Value) bool {
index 83b4a358a5b0610e39411072245e05a02a6ad18d..397cbb82f75b9320e46b6a77393d2039e2bfe414 100644 (file)
@@ -53,12 +53,12 @@ func DivPow2(f1, f2, f3 float64) (float64, float64, float64) {
 }
 
 func indexLoad(b0 []float32, b1 float32, idx int) float32 {
-       // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+\),\sF[0-9]+`
+       // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+`
        return b0[idx] * b1
 }
 
 func indexStore(b0 []float64, b1 float64, idx int) {
-       // arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`
+       // arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)`
        b0[idx] = b1
 }
 
index a2342831460281408c16b217d36c9ce85f2bafad..7f06a574fe613337688021aafa93cc0fb7d4c5e3 100644 (file)
@@ -177,9 +177,11 @@ func idxFloat32(x, y []float32, i int) {
        var t float32
        //    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
        // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+       //    arm64: `FMOVS\t\(R[0-9]*\)\(R[0-9]*<<2\), F[0-9]+`
        t = x[i+1]
        //    amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
        // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+       //    arm64: `FMOVS\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<2\)`
        y[i+1] = t
        //    amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
        // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
@@ -193,9 +195,11 @@ func idxFloat64(x, y []float64, i int) {
        var t float64
        //    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
        // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+       //    arm64: `FMOVD\t\(R[0-9]*\)\(R[0-9]*<<3\), F[0-9]+`
        t = x[i+1]
        //    amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
        // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+       //    arm64: `FMOVD\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<3\)`
        y[i+1] = t
        //    amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
        // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`