Optimize load and store to []float64 and []float32.
Previously it used LSL instead of shifted register indexed load/store.
Before:
LSL $3, R0, R0
FMOVD F0, (R1)(R0)
After:
FMOVD F0, (R1)(R0<<3)
Fixes #42798
Change-Id: I0c0912140c3dce5aa6abc27097c0eb93833cc589
Reviewed-on: https://go-review.googlesource.com/c/go/+/273706
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Giovanni Bajo <rasky@develer.com>
// Reg: base register, Index: (shifted) index register
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
switch v.Op {
- case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8:
+ case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
+ ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
- case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4:
+ case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
+ ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
ssa.OpARM64MOVHUloadidx2,
ssa.OpARM64MOVWloadidx4,
ssa.OpARM64MOVWUloadidx4,
- ssa.OpARM64MOVDloadidx8:
+ ssa.OpARM64MOVDloadidx8,
+ ssa.OpARM64FMOVDloadidx8,
+ ssa.OpARM64FMOVSloadidx4:
p := s.Prog(v.Op.Asm())
p.From = genIndexedOperand(v)
p.To.Type = obj.TYPE_REG
ssa.OpARM64FMOVDstoreidx,
ssa.OpARM64MOVHstoreidx2,
ssa.OpARM64MOVWstoreidx4,
- ssa.OpARM64MOVDstoreidx8:
+ ssa.OpARM64FMOVSstoreidx4,
+ ssa.OpARM64MOVDstoreidx8,
+ ssa.OpARM64FMOVDstoreidx8:
p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v)
p.From.Type = obj.TYPE_REG
(MOVHUloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHUload [int32(c)<<1] ptr mem)
(MOVHloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHload [int32(c)<<1] ptr mem)
+(FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx8 ptr idx mem)
+(FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx4 ptr idx mem)
+(FMOVDloadidx ptr (SLLconst [3] idx) mem) => (FMOVDloadidx8 ptr idx mem)
+(FMOVSloadidx ptr (SLLconst [2] idx) mem) => (FMOVSloadidx4 ptr idx mem)
+(FMOVDloadidx (SLLconst [3] idx) ptr mem) => (FMOVDloadidx8 ptr idx mem)
+(FMOVSloadidx (SLLconst [2] idx) ptr mem) => (FMOVSloadidx4 ptr idx mem)
+(FMOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (FMOVDload ptr [int32(c)<<3] mem)
+(FMOVSloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (FMOVSload ptr [int32(c)<<2] mem)
+
(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
(MOVBstore [off1+int32(off2)] {sym} ptr val mem)
(MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem)
(MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem)
+(FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx8 ptr idx val mem)
+(FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx4 ptr idx val mem)
+(FMOVDstoreidx ptr (SLLconst [3] idx) val mem) => (FMOVDstoreidx8 ptr idx val mem)
+(FMOVSstoreidx ptr (SLLconst [2] idx) val mem) => (FMOVSstoreidx4 ptr idx val mem)
+(FMOVDstoreidx (SLLconst [3] idx) ptr val mem) => (FMOVDstoreidx8 ptr idx val mem)
+(FMOVSstoreidx (SLLconst [2] idx) ptr val mem) => (FMOVSstoreidx4 ptr idx val mem)
+(FMOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (FMOVDstore [int32(c)<<3] ptr val mem)
+(FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (FMOVSstore [int32(c)<<2] ptr val mem)
+
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
{name: "FMOVDloadidx", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1, arg2=mem.
// shifted register indexed load
- {name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
- {name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
- {name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
- {name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
- {name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
+ {name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
+ {name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
+ {name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
+ {name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
+ {name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
+ {name: "FMOVSloadidx4", argLength: 3, reg: fp2load, asm: "FMOVS", typ: "Float32"}, // load 32-bit float from arg0 + arg1*4, arg2 = mem.
+ {name: "FMOVDloadidx8", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1*8, arg2 = mem.
{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVDstoreidx", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1, arg3=mem.
// shifted register indexed store
- {name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
- {name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
- {name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
+ {name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
+ {name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
+ {name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
+ {name: "FMOVSstoreidx4", argLength: 4, reg: fpstore2, asm: "FMOVS", typ: "Mem"}, // store 32-bit float of arg2 to arg0 + arg1*4, arg3=mem.
+ {name: "FMOVDstoreidx8", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1*8, arg3=mem.
{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
OpARM64MOVWloadidx4
OpARM64MOVWUloadidx4
OpARM64MOVDloadidx8
+ OpARM64FMOVSloadidx4
+ OpARM64FMOVDloadidx8
OpARM64MOVBstore
OpARM64MOVHstore
OpARM64MOVWstore
OpARM64MOVHstoreidx2
OpARM64MOVWstoreidx4
OpARM64MOVDstoreidx8
+ OpARM64FMOVSstoreidx4
+ OpARM64FMOVDstoreidx8
OpARM64MOVBstorezero
OpARM64MOVHstorezero
OpARM64MOVWstorezero
},
},
},
+ {
+ name: "FMOVSloadidx4",
+ argLen: 3,
+ asm: arm64.AFMOVS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FMOVDloadidx8",
+ argLen: 3,
+ asm: arm64.AFMOVD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ },
+ outputs: []outputInfo{
+ {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
{
name: "MOVBstore",
auxType: auxSymOff,
},
},
},
+ {
+ name: "FMOVSstoreidx4",
+ argLen: 4,
+ asm: arm64.AFMOVS,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ {2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
+ {
+ name: "FMOVDstoreidx8",
+ argLen: 4,
+ asm: arm64.AFMOVD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
+ {2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+ },
+ },
+ },
{
name: "MOVBstorezero",
auxType: auxSymOff,
return rewriteValueARM64_OpARM64FMOVDload(v)
case OpARM64FMOVDloadidx:
return rewriteValueARM64_OpARM64FMOVDloadidx(v)
+ case OpARM64FMOVDloadidx8:
+ return rewriteValueARM64_OpARM64FMOVDloadidx8(v)
case OpARM64FMOVDstore:
return rewriteValueARM64_OpARM64FMOVDstore(v)
case OpARM64FMOVDstoreidx:
return rewriteValueARM64_OpARM64FMOVDstoreidx(v)
+ case OpARM64FMOVDstoreidx8:
+ return rewriteValueARM64_OpARM64FMOVDstoreidx8(v)
case OpARM64FMOVSload:
return rewriteValueARM64_OpARM64FMOVSload(v)
case OpARM64FMOVSloadidx:
return rewriteValueARM64_OpARM64FMOVSloadidx(v)
+ case OpARM64FMOVSloadidx4:
+ return rewriteValueARM64_OpARM64FMOVSloadidx4(v)
case OpARM64FMOVSstore:
return rewriteValueARM64_OpARM64FMOVSstore(v)
case OpARM64FMOVSstoreidx:
return rewriteValueARM64_OpARM64FMOVSstoreidx(v)
+ case OpARM64FMOVSstoreidx4:
+ return rewriteValueARM64_OpARM64FMOVSstoreidx4(v)
case OpARM64FMULD:
return rewriteValueARM64_OpARM64FMULD(v)
case OpARM64FMULS:
v.AddArg3(ptr, idx, mem)
return true
}
+ // match: (FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem)
+ // cond: off == 0 && sym == nil
+ // result: (FMOVDloadidx8 ptr idx mem)
+ for {
+ off := auxIntToInt32(v.AuxInt)
+ sym := auxToSym(v.Aux)
+ if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
+ break
+ }
+ idx := v_0.Args[1]
+ ptr := v_0.Args[0]
+ mem := v_1
+ if !(off == 0 && sym == nil) {
+ break
+ }
+ v.reset(OpARM64FMOVDloadidx8)
+ v.AddArg3(ptr, idx, mem)
+ return true
+ }
// match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
v.AddArg2(ptr, mem)
return true
}
+ // match: (FMOVDloadidx ptr (SLLconst [3] idx) mem)
+ // result: (FMOVDloadidx8 ptr idx mem)
+ for {
+ ptr := v_0
+ if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
+ break
+ }
+ idx := v_1.Args[0]
+ mem := v_2
+ v.reset(OpARM64FMOVDloadidx8)
+ v.AddArg3(ptr, idx, mem)
+ return true
+ }
+ // match: (FMOVDloadidx (SLLconst [3] idx) ptr mem)
+ // result: (FMOVDloadidx8 ptr idx mem)
+ for {
+ if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
+ break
+ }
+ idx := v_0.Args[0]
+ ptr := v_1
+ mem := v_2
+ v.reset(OpARM64FMOVDloadidx8)
+ v.AddArg3(ptr, idx, mem)
+ return true
+ }
+ return false
+}
+func rewriteValueARM64_OpARM64FMOVDloadidx8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (FMOVDloadidx8 ptr (MOVDconst [c]) mem)
+ // cond: is32Bit(c<<3)
+ // result: (FMOVDload ptr [int32(c)<<3] mem)
+ for {
+ ptr := v_0
+ if v_1.Op != OpARM64MOVDconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mem := v_2
+ if !(is32Bit(c << 3)) {
+ break
+ }
+ v.reset(OpARM64FMOVDload)
+ v.AuxInt = int32ToAuxInt(int32(c) << 3)
+ v.AddArg2(ptr, mem)
+ return true
+ }
return false
}
func rewriteValueARM64_OpARM64FMOVDstore(v *Value) bool {
v.AddArg4(ptr, idx, val, mem)
return true
}
+ // match: (FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem)
+ // cond: off == 0 && sym == nil
+ // result: (FMOVDstoreidx8 ptr idx val mem)
+ for {
+ off := auxIntToInt32(v.AuxInt)
+ sym := auxToSym(v.Aux)
+ if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 {
+ break
+ }
+ idx := v_0.Args[1]
+ ptr := v_0.Args[0]
+ val := v_1
+ mem := v_2
+ if !(off == 0 && sym == nil) {
+ break
+ }
+ v.reset(OpARM64FMOVDstoreidx8)
+ v.AddArg4(ptr, idx, val, mem)
+ return true
+ }
// match: (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
v.AddArg3(idx, val, mem)
return true
}
+ // match: (FMOVDstoreidx ptr (SLLconst [3] idx) val mem)
+ // result: (FMOVDstoreidx8 ptr idx val mem)
+ for {
+ ptr := v_0
+ if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 {
+ break
+ }
+ idx := v_1.Args[0]
+ val := v_2
+ mem := v_3
+ v.reset(OpARM64FMOVDstoreidx8)
+ v.AddArg4(ptr, idx, val, mem)
+ return true
+ }
+ // match: (FMOVDstoreidx (SLLconst [3] idx) ptr val mem)
+ // result: (FMOVDstoreidx8 ptr idx val mem)
+ for {
+ if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 {
+ break
+ }
+ idx := v_0.Args[0]
+ ptr := v_1
+ val := v_2
+ mem := v_3
+ v.reset(OpARM64FMOVDstoreidx8)
+ v.AddArg4(ptr, idx, val, mem)
+ return true
+ }
+ return false
+}
+func rewriteValueARM64_OpARM64FMOVDstoreidx8(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (FMOVDstoreidx8 ptr (MOVDconst [c]) val mem)
+ // cond: is32Bit(c<<3)
+ // result: (FMOVDstore [int32(c)<<3] ptr val mem)
+ for {
+ ptr := v_0
+ if v_1.Op != OpARM64MOVDconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ val := v_2
+ mem := v_3
+ if !(is32Bit(c << 3)) {
+ break
+ }
+ v.reset(OpARM64FMOVDstore)
+ v.AuxInt = int32ToAuxInt(int32(c) << 3)
+ v.AddArg3(ptr, val, mem)
+ return true
+ }
return false
}
func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool {
v.AddArg3(ptr, idx, mem)
return true
}
+ // match: (FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem)
+ // cond: off == 0 && sym == nil
+ // result: (FMOVSloadidx4 ptr idx mem)
+ for {
+ off := auxIntToInt32(v.AuxInt)
+ sym := auxToSym(v.Aux)
+ if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
+ break
+ }
+ idx := v_0.Args[1]
+ ptr := v_0.Args[0]
+ mem := v_1
+ if !(off == 0 && sym == nil) {
+ break
+ }
+ v.reset(OpARM64FMOVSloadidx4)
+ v.AddArg3(ptr, idx, mem)
+ return true
+ }
// match: (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
v.AddArg2(ptr, mem)
return true
}
+ // match: (FMOVSloadidx ptr (SLLconst [2] idx) mem)
+ // result: (FMOVSloadidx4 ptr idx mem)
+ for {
+ ptr := v_0
+ if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
+ break
+ }
+ idx := v_1.Args[0]
+ mem := v_2
+ v.reset(OpARM64FMOVSloadidx4)
+ v.AddArg3(ptr, idx, mem)
+ return true
+ }
+ // match: (FMOVSloadidx (SLLconst [2] idx) ptr mem)
+ // result: (FMOVSloadidx4 ptr idx mem)
+ for {
+ if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
+ break
+ }
+ idx := v_0.Args[0]
+ ptr := v_1
+ mem := v_2
+ v.reset(OpARM64FMOVSloadidx4)
+ v.AddArg3(ptr, idx, mem)
+ return true
+ }
+ return false
+}
+func rewriteValueARM64_OpARM64FMOVSloadidx4(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (FMOVSloadidx4 ptr (MOVDconst [c]) mem)
+ // cond: is32Bit(c<<2)
+ // result: (FMOVSload ptr [int32(c)<<2] mem)
+ for {
+ ptr := v_0
+ if v_1.Op != OpARM64MOVDconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ mem := v_2
+ if !(is32Bit(c << 2)) {
+ break
+ }
+ v.reset(OpARM64FMOVSload)
+ v.AuxInt = int32ToAuxInt(int32(c) << 2)
+ v.AddArg2(ptr, mem)
+ return true
+ }
return false
}
func rewriteValueARM64_OpARM64FMOVSstore(v *Value) bool {
v.AddArg4(ptr, idx, val, mem)
return true
}
+ // match: (FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem)
+ // cond: off == 0 && sym == nil
+ // result: (FMOVSstoreidx4 ptr idx val mem)
+ for {
+ off := auxIntToInt32(v.AuxInt)
+ sym := auxToSym(v.Aux)
+ if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 {
+ break
+ }
+ idx := v_0.Args[1]
+ ptr := v_0.Args[0]
+ val := v_1
+ mem := v_2
+ if !(off == 0 && sym == nil) {
+ break
+ }
+ v.reset(OpARM64FMOVSstoreidx4)
+ v.AddArg4(ptr, idx, val, mem)
+ return true
+ }
// match: (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
// cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared)
// result: (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
v.AddArg3(idx, val, mem)
return true
}
+ // match: (FMOVSstoreidx ptr (SLLconst [2] idx) val mem)
+ // result: (FMOVSstoreidx4 ptr idx val mem)
+ for {
+ ptr := v_0
+ if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 {
+ break
+ }
+ idx := v_1.Args[0]
+ val := v_2
+ mem := v_3
+ v.reset(OpARM64FMOVSstoreidx4)
+ v.AddArg4(ptr, idx, val, mem)
+ return true
+ }
+ // match: (FMOVSstoreidx (SLLconst [2] idx) ptr val mem)
+ // result: (FMOVSstoreidx4 ptr idx val mem)
+ for {
+ if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 {
+ break
+ }
+ idx := v_0.Args[0]
+ ptr := v_1
+ val := v_2
+ mem := v_3
+ v.reset(OpARM64FMOVSstoreidx4)
+ v.AddArg4(ptr, idx, val, mem)
+ return true
+ }
+ return false
+}
+func rewriteValueARM64_OpARM64FMOVSstoreidx4(v *Value) bool {
+ v_3 := v.Args[3]
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (FMOVSstoreidx4 ptr (MOVDconst [c]) val mem)
+ // cond: is32Bit(c<<2)
+ // result: (FMOVSstore [int32(c)<<2] ptr val mem)
+ for {
+ ptr := v_0
+ if v_1.Op != OpARM64MOVDconst {
+ break
+ }
+ c := auxIntToInt64(v_1.AuxInt)
+ val := v_2
+ mem := v_3
+ if !(is32Bit(c << 2)) {
+ break
+ }
+ v.reset(OpARM64FMOVSstore)
+ v.AuxInt = int32ToAuxInt(int32(c) << 2)
+ v.AddArg3(ptr, val, mem)
+ return true
+ }
return false
}
func rewriteValueARM64_OpARM64FMULD(v *Value) bool {
}
func indexLoad(b0 []float32, b1 float32, idx int) float32 {
- // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+\),\sF[0-9]+`
+ // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+`
return b0[idx] * b1
}
func indexStore(b0 []float64, b1 float64, idx int) {
- // arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`
+ // arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)`
b0[idx] = b1
}
var t float32
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+`
+ // arm64: `FMOVS\t\(R[0-9]*\)\(R[0-9]*<<2\), F[0-9]+`
t = x[i+1]
// amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
// 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)`
+ // arm64: `FMOVS\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<2\)`
y[i+1] = t
// amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
// 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+`
var t float64
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+`
+ // arm64: `FMOVD\t\(R[0-9]*\)\(R[0-9]*<<3\), F[0-9]+`
t = x[i+1]
// amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
// 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)`
+ // arm64: `FMOVD\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<3\)`
y[i+1] = t
// amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`
// 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`