From: Egon Elbre Date: Fri, 27 Nov 2020 15:10:33 +0000 (+0200) Subject: cmd/compile: ARM64 optimize []float64 and []float32 access X-Git-Tag: go1.17beta1~1383 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=3ee32439b5;p=gostls13.git cmd/compile: ARM64 optimize []float64 and []float32 access Optimize load and store to []float64 and []float32. Previously it used LSL instead of shifted register indexed load/store. Before: LSL $3, R0, R0 FMOVD F0, (R1)(R0) After: FMOVD F0, (R1)(R0<<3) Fixes #42798 Change-Id: I0c0912140c3dce5aa6abc27097c0eb93833cc589 Reviewed-on: https://go-review.googlesource.com/c/go/+/273706 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Go Bot Trust: Giovanni Bajo --- diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 73e74e1219..ca5eac72bf 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -100,9 +100,11 @@ func genIndexedOperand(v *ssa.Value) obj.Addr { // Reg: base register, Index: (shifted) index register mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} switch v.Op { - case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8: + case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8, + ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8: mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31 - case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4: + case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4, + ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4: mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31 case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2: mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31 @@ -435,7 +437,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, - ssa.OpARM64MOVDloadidx8: + ssa.OpARM64MOVDloadidx8, + ssa.OpARM64FMOVDloadidx8, + ssa.OpARM64FMOVSloadidx4: p := s.Prog(v.Op.Asm()) p.From = genIndexedOperand(v) p.To.Type = obj.TYPE_REG @@ -472,7 +476,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64FMOVDstoreidx, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVWstoreidx4, - ssa.OpARM64MOVDstoreidx8: + ssa.OpARM64FMOVSstoreidx4, + ssa.OpARM64MOVDstoreidx8, + ssa.OpARM64FMOVDstoreidx8: p := s.Prog(v.Op.Asm()) p.To = genIndexedOperand(v) p.From.Type = obj.TYPE_REG diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 4531c38a7a..98503748db 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -792,6 +792,15 @@ (MOVHUloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHUload [int32(c)<<1] ptr mem) (MOVHloadidx2 ptr (MOVDconst [c]) mem) && is32Bit(c<<1) => (MOVHload [int32(c)<<1] ptr mem) +(FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil => (FMOVDloadidx8 ptr idx mem) +(FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil => (FMOVSloadidx4 ptr idx mem) +(FMOVDloadidx ptr (SLLconst [3] idx) mem) => (FMOVDloadidx8 ptr idx mem) +(FMOVSloadidx ptr (SLLconst [2] idx) mem) => (FMOVSloadidx4 ptr idx mem) +(FMOVDloadidx (SLLconst [3] idx) ptr mem) => (FMOVDloadidx8 ptr idx mem) +(FMOVSloadidx (SLLconst [2] idx) ptr mem) => (FMOVSloadidx4 ptr idx mem) +(FMOVDloadidx8 ptr (MOVDconst [c]) mem) && is32Bit(c<<3) => (FMOVDload ptr [int32(c)<<3] mem) +(FMOVSloadidx4 ptr (MOVDconst [c]) mem) && is32Bit(c<<2) => (FMOVSload ptr [int32(c)<<2] mem) + (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => (MOVBstore [off1+int32(off2)] {sym} ptr val mem) @@ -865,6 +874,15 @@ (MOVWstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (MOVWstore [int32(c)<<2] ptr val mem) (MOVHstoreidx2 ptr (MOVDconst [c]) val mem) && is32Bit(c<<1) => (MOVHstore [int32(c)<<1] ptr val mem) +(FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil => (FMOVDstoreidx8 ptr idx val mem) +(FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil => (FMOVSstoreidx4 ptr idx val mem) +(FMOVDstoreidx ptr (SLLconst [3] idx) val mem) => (FMOVDstoreidx8 ptr idx val mem) +(FMOVSstoreidx ptr (SLLconst [2] idx) val mem) => (FMOVSstoreidx4 ptr idx val mem) +(FMOVDstoreidx (SLLconst [3] idx) ptr val mem) => (FMOVDstoreidx8 ptr idx val mem) +(FMOVSstoreidx (SLLconst [2] idx) ptr val mem) => (FMOVSstoreidx4 ptr idx val mem) +(FMOVDstoreidx8 ptr (MOVDconst [c]) val mem) && is32Bit(c<<3) => (FMOVDstore [int32(c)<<3] ptr val mem) +(FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) && is32Bit(c<<2) => (FMOVSstore [int32(c)<<2] ptr val mem) + (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) => diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index b0bc9c78ff..e826e75252 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -379,11 +379,13 @@ func init() { {name: "FMOVDloadidx", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1, arg2=mem. // shifted register indexed load - {name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem. - {name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem. - {name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem. - {name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem. - {name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem. + {name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH", typ: "Int16"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem. + {name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU", typ: "UInt16"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem. + {name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW", typ: "Int32"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem. + {name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU", typ: "UInt32"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem. + {name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD", typ: "UInt64"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem. + {name: "FMOVSloadidx4", argLength: 3, reg: fp2load, asm: "FMOVS", typ: "Float32"}, // load 32-bit float from arg0 + arg1*4, arg2 = mem. + {name: "FMOVDloadidx8", argLength: 3, reg: fp2load, asm: "FMOVD", typ: "Float64"}, // load 64-bit float from arg0 + arg1*8, arg2 = mem. {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. @@ -402,9 +404,11 @@ func init() { {name: "FMOVDstoreidx", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1, arg3=mem. // shifted register indexed store - {name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem. - {name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem. - {name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem. + {name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem. + {name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem. + {name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem. + {name: "FMOVSstoreidx4", argLength: 4, reg: fpstore2, asm: "FMOVS", typ: "Mem"}, // store 32-bit float of arg2 to arg0 + arg1*4, arg3=mem. + {name: "FMOVDstoreidx8", argLength: 4, reg: fpstore2, asm: "FMOVD", typ: "Mem"}, // store 64-bit float of arg2 to arg0 + arg1*8, arg3=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index ba170968ae..551aa725b6 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1481,6 +1481,8 @@ const ( OpARM64MOVWloadidx4 OpARM64MOVWUloadidx4 OpARM64MOVDloadidx8 + OpARM64FMOVSloadidx4 + OpARM64FMOVDloadidx8 OpARM64MOVBstore OpARM64MOVHstore OpARM64MOVWstore @@ -1497,6 +1499,8 @@ const ( OpARM64MOVHstoreidx2 OpARM64MOVWstoreidx4 OpARM64MOVDstoreidx8 + OpARM64FMOVSstoreidx4 + OpARM64FMOVDstoreidx8 OpARM64MOVBstorezero OpARM64MOVHstorezero OpARM64MOVWstorezero @@ -19787,6 +19791,34 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "FMOVSloadidx4", + argLen: 3, + asm: arm64.AFMOVS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { + name: "FMOVDloadidx8", + argLen: 3, + asm: arm64.AFMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "MOVBstore", auxType: auxSymOff, @@ -19994,6 +20026,30 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "FMOVSstoreidx4", + argLen: 4, + asm: arm64.AFMOVS, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + {2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, + { + name: "FMOVDstoreidx8", + argLen: 4, + asm: arm64.AFMOVD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + {2, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31 + }, + }, + }, { name: "MOVBstorezero", auxType: auxSymOff, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index ba146c7043..ece834f996 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -99,18 +99,26 @@ func rewriteValueARM64(v *Value) bool { return rewriteValueARM64_OpARM64FMOVDload(v) case OpARM64FMOVDloadidx: return rewriteValueARM64_OpARM64FMOVDloadidx(v) + case OpARM64FMOVDloadidx8: + return rewriteValueARM64_OpARM64FMOVDloadidx8(v) case OpARM64FMOVDstore: return rewriteValueARM64_OpARM64FMOVDstore(v) case OpARM64FMOVDstoreidx: return rewriteValueARM64_OpARM64FMOVDstoreidx(v) + case OpARM64FMOVDstoreidx8: + return rewriteValueARM64_OpARM64FMOVDstoreidx8(v) case OpARM64FMOVSload: return rewriteValueARM64_OpARM64FMOVSload(v) case OpARM64FMOVSloadidx: return rewriteValueARM64_OpARM64FMOVSloadidx(v) + case OpARM64FMOVSloadidx4: + return rewriteValueARM64_OpARM64FMOVSloadidx4(v) case OpARM64FMOVSstore: return rewriteValueARM64_OpARM64FMOVSstore(v) case OpARM64FMOVSstoreidx: return rewriteValueARM64_OpARM64FMOVSstoreidx(v) + case OpARM64FMOVSstoreidx4: + return rewriteValueARM64_OpARM64FMOVSstoreidx4(v) case OpARM64FMULD: return rewriteValueARM64_OpARM64FMULD(v) case OpARM64FMULS: @@ -3900,6 +3908,25 @@ func rewriteValueARM64_OpARM64FMOVDload(v *Value) bool { v.AddArg3(ptr, idx, mem) return true } + // match: (FMOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (FMOVDloadidx8 ptr idx mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 { + break + } + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64FMOVDloadidx8) + v.AddArg3(ptr, idx, mem) + return true + } // match: (FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -3964,6 +3991,56 @@ func rewriteValueARM64_OpARM64FMOVDloadidx(v *Value) bool { v.AddArg2(ptr, mem) return true } + // match: (FMOVDloadidx ptr (SLLconst [3] idx) mem) + // result: (FMOVDloadidx8 ptr idx mem) + for { + ptr := v_0 + if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 { + break + } + idx := v_1.Args[0] + mem := v_2 + v.reset(OpARM64FMOVDloadidx8) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (FMOVDloadidx (SLLconst [3] idx) ptr mem) + // result: (FMOVDloadidx8 ptr idx mem) + for { + if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 { + break + } + idx := v_0.Args[0] + ptr := v_1 + mem := v_2 + v.reset(OpARM64FMOVDloadidx8) + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64FMOVDloadidx8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (FMOVDloadidx8 ptr (MOVDconst [c]) mem) + // cond: is32Bit(c<<3) + // result: (FMOVDload ptr [int32(c)<<3] mem) + for { + ptr := v_0 + if v_1.Op != OpARM64MOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + if !(is32Bit(c << 3)) { + break + } + v.reset(OpARM64FMOVDload) + v.AuxInt = int32ToAuxInt(int32(c) << 3) + v.AddArg2(ptr, mem) + return true + } return false } func rewriteValueARM64_OpARM64FMOVDstore(v *Value) bool { @@ -4031,6 +4108,26 @@ func rewriteValueARM64_OpARM64FMOVDstore(v *Value) bool { v.AddArg4(ptr, idx, val, mem) return true } + // match: (FMOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) + // cond: off == 0 && sym == nil + // result: (FMOVDstoreidx8 ptr idx val mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 3 { + break + } + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64FMOVDstoreidx8) + v.AddArg4(ptr, idx, val, mem) + return true + } // match: (FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -4099,6 +4196,60 @@ func rewriteValueARM64_OpARM64FMOVDstoreidx(v *Value) bool { v.AddArg3(idx, val, mem) return true } + // match: (FMOVDstoreidx ptr (SLLconst [3] idx) val mem) + // result: (FMOVDstoreidx8 ptr idx val mem) + for { + ptr := v_0 + if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 3 { + break + } + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(OpARM64FMOVDstoreidx8) + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (FMOVDstoreidx (SLLconst [3] idx) ptr val mem) + // result: (FMOVDstoreidx8 ptr idx val mem) + for { + if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 3 { + break + } + idx := v_0.Args[0] + ptr := v_1 + val := v_2 + mem := v_3 + v.reset(OpARM64FMOVDstoreidx8) + v.AddArg4(ptr, idx, val, mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64FMOVDstoreidx8(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (FMOVDstoreidx8 ptr (MOVDconst [c]) val mem) + // cond: is32Bit(c<<3) + // result: (FMOVDstore [int32(c)<<3] ptr val mem) + for { + ptr := v_0 + if v_1.Op != OpARM64MOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + val := v_2 + mem := v_3 + if !(is32Bit(c << 3)) { + break + } + v.reset(OpARM64FMOVDstore) + v.AuxInt = int32ToAuxInt(int32(c) << 3) + v.AddArg3(ptr, val, mem) + return true + } return false } func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool { @@ -4163,6 +4314,25 @@ func rewriteValueARM64_OpARM64FMOVSload(v *Value) bool { v.AddArg3(ptr, idx, mem) return true } + // match: (FMOVSload [off] {sym} (ADDshiftLL [2] ptr idx) mem) + // cond: off == 0 && sym == nil + // result: (FMOVSloadidx4 ptr idx mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 { + break + } + idx := v_0.Args[1] + ptr := v_0.Args[0] + mem := v_1 + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64FMOVSloadidx4) + v.AddArg3(ptr, idx, mem) + return true + } // match: (FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem) @@ -4227,6 +4397,56 @@ func rewriteValueARM64_OpARM64FMOVSloadidx(v *Value) bool { v.AddArg2(ptr, mem) return true } + // match: (FMOVSloadidx ptr (SLLconst [2] idx) mem) + // result: (FMOVSloadidx4 ptr idx mem) + for { + ptr := v_0 + if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 { + break + } + idx := v_1.Args[0] + mem := v_2 + v.reset(OpARM64FMOVSloadidx4) + v.AddArg3(ptr, idx, mem) + return true + } + // match: (FMOVSloadidx (SLLconst [2] idx) ptr mem) + // result: (FMOVSloadidx4 ptr idx mem) + for { + if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 { + break + } + idx := v_0.Args[0] + ptr := v_1 + mem := v_2 + v.reset(OpARM64FMOVSloadidx4) + v.AddArg3(ptr, idx, mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64FMOVSloadidx4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (FMOVSloadidx4 ptr (MOVDconst [c]) mem) + // cond: is32Bit(c<<2) + // result: (FMOVSload ptr [int32(c)<<2] mem) + for { + ptr := v_0 + if v_1.Op != OpARM64MOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mem := v_2 + if !(is32Bit(c << 2)) { + break + } + v.reset(OpARM64FMOVSload) + v.AuxInt = int32ToAuxInt(int32(c) << 2) + v.AddArg2(ptr, mem) + return true + } return false } func rewriteValueARM64_OpARM64FMOVSstore(v *Value) bool { @@ -4294,6 +4514,26 @@ func rewriteValueARM64_OpARM64FMOVSstore(v *Value) bool { v.AddArg4(ptr, idx, val, mem) return true } + // match: (FMOVSstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) + // cond: off == 0 && sym == nil + // result: (FMOVSstoreidx4 ptr idx val mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + if v_0.Op != OpARM64ADDshiftLL || auxIntToInt64(v_0.AuxInt) != 2 { + break + } + idx := v_0.Args[1] + ptr := v_0.Args[0] + val := v_1 + mem := v_2 + if !(off == 0 && sym == nil) { + break + } + v.reset(OpARM64FMOVSstoreidx4) + v.AddArg4(ptr, idx, val, mem) + return true + } // match: (FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem) // cond: canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) && (ptr.Op != OpSB || !config.ctxt.Flag_shared) // result: (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem) @@ -4362,6 +4602,60 @@ func rewriteValueARM64_OpARM64FMOVSstoreidx(v *Value) bool { v.AddArg3(idx, val, mem) return true } + // match: (FMOVSstoreidx ptr (SLLconst [2] idx) val mem) + // result: (FMOVSstoreidx4 ptr idx val mem) + for { + ptr := v_0 + if v_1.Op != OpARM64SLLconst || auxIntToInt64(v_1.AuxInt) != 2 { + break + } + idx := v_1.Args[0] + val := v_2 + mem := v_3 + v.reset(OpARM64FMOVSstoreidx4) + v.AddArg4(ptr, idx, val, mem) + return true + } + // match: (FMOVSstoreidx (SLLconst [2] idx) ptr val mem) + // result: (FMOVSstoreidx4 ptr idx val mem) + for { + if v_0.Op != OpARM64SLLconst || auxIntToInt64(v_0.AuxInt) != 2 { + break + } + idx := v_0.Args[0] + ptr := v_1 + val := v_2 + mem := v_3 + v.reset(OpARM64FMOVSstoreidx4) + v.AddArg4(ptr, idx, val, mem) + return true + } + return false +} +func rewriteValueARM64_OpARM64FMOVSstoreidx4(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (FMOVSstoreidx4 ptr (MOVDconst [c]) val mem) + // cond: is32Bit(c<<2) + // result: (FMOVSstore [int32(c)<<2] ptr val mem) + for { + ptr := v_0 + if v_1.Op != OpARM64MOVDconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + val := v_2 + mem := v_3 + if !(is32Bit(c << 2)) { + break + } + v.reset(OpARM64FMOVSstore) + v.AuxInt = int32ToAuxInt(int32(c) << 2) + v.AddArg3(ptr, val, mem) + return true + } return false } func rewriteValueARM64_OpARM64FMULD(v *Value) bool { diff --git a/test/codegen/floats.go b/test/codegen/floats.go index 83b4a358a5..397cbb82f7 100644 --- a/test/codegen/floats.go +++ b/test/codegen/floats.go @@ -53,12 +53,12 @@ func DivPow2(f1, f2, f3 float64) (float64, float64, float64) { } func indexLoad(b0 []float32, b1 float32, idx int) float32 { - // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+\),\sF[0-9]+` + // arm64:`FMOVS\s\(R[0-9]+\)\(R[0-9]+<<2\),\sF[0-9]+` return b0[idx] * b1 } func indexStore(b0 []float64, b1 float64, idx int) { - // arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)` + // arm64:`FMOVD\sF[0-9]+,\s\(R[0-9]+\)\(R[0-9]+<<3\)` b0[idx] = b1 } diff --git a/test/codegen/memops.go b/test/codegen/memops.go index a234283146..7f06a574fe 100644 --- a/test/codegen/memops.go +++ b/test/codegen/memops.go @@ -177,9 +177,11 @@ func idxFloat32(x, y []float32, i int) { var t float32 // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), X[0-9]+` + // arm64: `FMOVS\t\(R[0-9]*\)\(R[0-9]*<<2\), F[0-9]+` t = x[i+1] // amd64: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` // 386/sse2: `MOVSS\tX[0-9]+, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\)` + // arm64: `FMOVS\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<2\)` y[i+1] = t // amd64: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` // 386/sse2: `MOVSS\t4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[14]\), X[0-9]+` @@ -193,9 +195,11 @@ func idxFloat64(x, y []float64, i int) { var t float64 // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), X[0-9]+` + // arm64: `FMOVD\t\(R[0-9]*\)\(R[0-9]*<<3\), F[0-9]+` t = x[i+1] // amd64: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` // 386/sse2: `MOVSD\tX[0-9]+, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\)` + // arm64: `FMOVD\tF[0-9]+, \(R[0-9]*\)\(R[0-9]*<<3\)` y[i+1] = t // amd64: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+` // 386/sse2: `MOVSD\t8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*[18]\), X[0-9]+`