From 69a755b6020e20b7c424628e9c1ba4e28d311373 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Mon, 8 Aug 2016 11:26:25 -0700 Subject: [PATCH] [dev.ssa] cmd/compile: port SSA backend to amd64p32 It's not a new backend, just a PtrSize==4 modification of the existing AMD64 backend. Change-Id: Icc63521a5cf4ebb379f7430ef3f070894c09afda Reviewed-on: https://go-review.googlesource.com/25586 Run-TryBot: Keith Randall TryBot-Result: Gobot Gobot Reviewed-by: David Chase --- src/cmd/compile/internal/amd64/ssa.go | 6 +- src/cmd/compile/internal/gc/ssa.go | 8 +- src/cmd/compile/internal/ssa/config.go | 11 + src/cmd/compile/internal/ssa/decompose.go | 4 + src/cmd/compile/internal/ssa/gen/AMD64.rules | 97 +- src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 13 +- src/cmd/compile/internal/ssa/opGen.go | 31 + src/cmd/compile/internal/ssa/opt.go | 2 +- src/cmd/compile/internal/ssa/regalloc.go | 10 +- src/cmd/compile/internal/ssa/rewriteAMD64.go | 875 ++++++++++++++++++- test/live.go | 2 +- test/live_ssa.go | 2 +- test/nilptr3.go | 2 +- test/nilptr3_ssa.go | 2 +- test/sliceopt.go | 2 +- 15 files changed, 999 insertions(+), 68 deletions(-) diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index a95fa245d2..688025753c 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -498,8 +498,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { gc.AddAux(&p.From, v) p.To.Type = obj.TYPE_REG p.To.Reg = gc.SSARegNum(v) - case ssa.OpAMD64LEAQ: - p := gc.Prog(x86.ALEAQ) + case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL: + p := gc.Prog(v.Op.Asm()) p.From.Type = obj.TYPE_MEM p.From.Reg = gc.SSARegNum(v.Args[0]) gc.AddAux(&p.From, v) @@ -703,7 +703,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg)) p.To.Offset = v.AuxInt - case ssa.OpCopy, ssa.OpAMD64MOVQconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? + case ssa.OpCopy, ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert: // TODO: use MOVQreg for reg->reg copies instead of OpCopy? if v.Type.IsMemory() { return } diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 43dbcf54ca..63f9203895 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -37,7 +37,7 @@ func shouldssa(fn *Node) bool { if os.Getenv("SSATEST") == "" { return false } - case "amd64", "arm": + case "amd64", "amd64p32", "arm": // Generally available. } if !ssaEnabled { @@ -1657,7 +1657,7 @@ func (s *state) expr(n *Node) *ssa.Value { if ft.IsFloat() || tt.IsFloat() { conv, ok := fpConvOpToSSA[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}] - if s.config.IntSize == 4 { + if s.config.IntSize == 4 && Thearch.LinkArch.Name != "amd64p32" { if conv1, ok1 := fpConvOpToSSA32[twoTypes{s.concreteEtype(ft), s.concreteEtype(tt)}]; ok1 { conv = conv1 } @@ -2998,6 +2998,10 @@ func (s *state) rtcall(fn *Node, returns bool, results []*Type, args ...*ssa.Val off += size } off = Rnd(off, int64(Widthptr)) + if Thearch.LinkArch.Name == "amd64p32" { + // amd64p32 wants 8-byte alignment of the start of the return values. + off = Rnd(off, 8) + } // Issue call call := s.newValue1A(ssa.OpStaticCall, ssa.TypeMem, fn.Sym, s.mem()) diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 1bfee3882f..a2daac09ce 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -138,6 +138,17 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config c.fpRegMask = fpRegMaskAMD64 c.FPReg = framepointerRegAMD64 c.hasGReg = false + case "amd64p32": + c.IntSize = 4 + c.PtrSize = 4 + c.lowerBlock = rewriteBlockAMD64 + c.lowerValue = rewriteValueAMD64 + c.registers = registersAMD64[:] + c.gpRegMask = gpRegMaskAMD64 + c.fpRegMask = fpRegMaskAMD64 + c.FPReg = framepointerRegAMD64 + c.hasGReg = false + c.noDuffDevice = true case "386": c.IntSize = 4 c.PtrSize = 4 diff --git a/src/cmd/compile/internal/ssa/decompose.go b/src/cmd/compile/internal/ssa/decompose.go index 08cc1b9dee..2f637e45b7 100644 --- a/src/cmd/compile/internal/ssa/decompose.go +++ b/src/cmd/compile/internal/ssa/decompose.go @@ -108,6 +108,10 @@ func decomposeBuiltIn(f *Func) { func decomposeBuiltInPhi(v *Value) { switch { case v.Type.IsInteger() && v.Type.Size() == 8 && v.Block.Func.Config.IntSize == 4: + if v.Block.Func.Config.arch == "amd64p32" { + // Even though ints are 32 bits, we have 64-bit ops. + break + } decomposeInt64Phi(v) case v.Type.IsComplex(): decomposeComplexPhi(v) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 9b418661f0..aa81ca7aa8 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -4,7 +4,8 @@ // Lowering arithmetic (Add64 x y) -> (ADDQ x y) -(AddPtr x y) -> (ADDQ x y) +(AddPtr x y) && config.PtrSize == 8 -> (ADDQ x y) +(AddPtr x y) && config.PtrSize == 4 -> (ADDL x y) (Add32 x y) -> (ADDL x y) (Add16 x y) -> (ADDL x y) (Add8 x y) -> (ADDL x y) @@ -12,7 +13,8 @@ (Add64F x y) -> (ADDSD x y) (Sub64 x y) -> (SUBQ x y) -(SubPtr x y) -> (SUBQ x y) +(SubPtr x y) && config.PtrSize == 8 -> (SUBQ x y) +(SubPtr x y) && config.PtrSize == 4 -> (SUBL x y) (Sub32 x y) -> (SUBL x y) (Sub16 x y) -> (SUBL x y) (Sub8 x y) -> (SUBL x y) @@ -91,8 +93,9 @@ (Not x) -> (XORLconst [1] x) // Lowering pointer arithmetic -(OffPtr [off] ptr) && is32Bit(off) -> (ADDQconst [off] ptr) -(OffPtr [off] ptr) -> (ADDQ (MOVQconst [off]) ptr) +(OffPtr [off] ptr) && config.PtrSize == 8 && is32Bit(off) -> (ADDQconst [off] ptr) +(OffPtr [off] ptr) && config.PtrSize == 8 -> (ADDQ (MOVQconst [off]) ptr) +(OffPtr [off] ptr) && config.PtrSize == 4 -> (ADDLconst [off] ptr) // Lowering other arithmetic // TODO: CMPQconst 0 below is redundant because BSF sets Z but how to remove? @@ -270,7 +273,8 @@ (Eq16 x y) -> (SETEQ (CMPW x y)) (Eq8 x y) -> (SETEQ (CMPB x y)) (EqB x y) -> (SETEQ (CMPB x y)) -(EqPtr x y) -> (SETEQ (CMPQ x y)) +(EqPtr x y) && config.PtrSize == 8 -> (SETEQ (CMPQ x y)) +(EqPtr x y) && config.PtrSize == 4 -> (SETEQ (CMPL x y)) (Eq64F x y) -> (SETEQF (UCOMISD x y)) (Eq32F x y) -> (SETEQF (UCOMISS x y)) @@ -279,13 +283,16 @@ (Neq16 x y) -> (SETNE (CMPW x y)) (Neq8 x y) -> (SETNE (CMPB x y)) (NeqB x y) -> (SETNE (CMPB x y)) -(NeqPtr x y) -> (SETNE (CMPQ x y)) +(NeqPtr x y) && config.PtrSize == 8 -> (SETNE (CMPQ x y)) +(NeqPtr x y) && config.PtrSize == 4 -> (SETNE (CMPL x y)) (Neq64F x y) -> (SETNEF (UCOMISD x y)) (Neq32F x y) -> (SETNEF (UCOMISS x y)) +(Int64Hi x) -> (SHRQconst [32] x) // needed for amd64p32 + // Lowering loads -(Load ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem) -(Load ptr mem) && is32BitInt(t) -> (MOVLload ptr mem) +(Load ptr mem) && (is64BitInt(t) || isPtr(t) && config.PtrSize == 8) -> (MOVQload ptr mem) +(Load ptr mem) && (is32BitInt(t) || isPtr(t) && config.PtrSize == 4) -> (MOVLload ptr mem) (Load ptr mem) && is16BitInt(t) -> (MOVWload ptr mem) (Load ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem) (Load ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem) @@ -328,14 +335,14 @@ (Move [s] dst src mem) && SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size()%16 != 0 && SizeAndAlign(s).Size()%16 <= 8 -> (Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16] - (ADDQconst dst [SizeAndAlign(s).Size()%16]) - (ADDQconst src [SizeAndAlign(s).Size()%16]) + (OffPtr dst [SizeAndAlign(s).Size()%16]) + (OffPtr src [SizeAndAlign(s).Size()%16]) (MOVQstore dst (MOVQload src mem) mem)) (Move [s] dst src mem) && SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size()%16 != 0 && SizeAndAlign(s).Size()%16 > 8 -> (Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16] - (ADDQconst dst [SizeAndAlign(s).Size()%16]) - (ADDQconst src [SizeAndAlign(s).Size()%16]) + (OffPtr dst [SizeAndAlign(s).Size()%16]) + (OffPtr src [SizeAndAlign(s).Size()%16]) (MOVOstore dst (MOVOload src mem) mem)) // Medium copying uses a duff device. @@ -376,7 +383,7 @@ // Strip off any fractional word zeroing. (Zero [s] destptr mem) && SizeAndAlign(s).Size()%8 != 0 && SizeAndAlign(s).Size() > 8 -> - (Zero [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8] (ADDQconst destptr [SizeAndAlign(s).Size()%8]) + (Zero [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8] (OffPtr destptr [SizeAndAlign(s).Size()%8]) (MOVQstoreconst [0] destptr mem)) // Zero small numbers of words directly. @@ -397,7 +404,7 @@ (Zero [s] destptr mem) && SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size()%16 != 0 && !config.noDuffDevice -> - (Zero [SizeAndAlign(s).Size()-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem)) + (Zero [SizeAndAlign(s).Size()-8] (OffPtr [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem)) (Zero [s] destptr mem) && SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%16 == 0 && !config.noDuffDevice -> (DUFFZERO [SizeAndAlign(s).Size()] destptr (MOVOconst [0]) mem) @@ -415,7 +422,8 @@ (Const64 [val]) -> (MOVQconst [val]) (Const32F [val]) -> (MOVSSconst [val]) (Const64F [val]) -> (MOVSDconst [val]) -(ConstNil) -> (MOVQconst [0]) +(ConstNil) && config.PtrSize == 8 -> (MOVQconst [0]) +(ConstNil) && config.PtrSize == 4 -> (MOVLconst [0]) (ConstBool [b]) -> (MOVLconst [b]) // Lowering calls @@ -426,14 +434,17 @@ (InterCall [argwid] entry mem) -> (CALLinter [argwid] entry mem) // Miscellaneous -(Convert x mem) -> (MOVQconvert x mem) -(IsNonNil p) -> (SETNE (TESTQ p p)) +(Convert x mem) && config.PtrSize == 8 -> (MOVQconvert x mem) +(Convert x mem) && config.PtrSize == 4 -> (MOVLconvert x mem) +(IsNonNil p) && config.PtrSize == 8 -> (SETNE (TESTQ p p)) +(IsNonNil p) && config.PtrSize == 4 -> (SETNE (TESTL p p)) (IsInBounds idx len) -> (SETB (CMPQ idx len)) (IsSliceInBounds idx len) -> (SETBE (CMPQ idx len)) (NilCheck ptr mem) -> (LoweredNilCheck ptr mem) (GetG mem) -> (LoweredGetG mem) (GetClosurePtr) -> (LoweredGetClosurePtr) -(Addr {sym} base) -> (LEAQ {sym} base) +(Addr {sym} base) && config.PtrSize == 8 -> (LEAQ {sym} base) +(Addr {sym} base) && config.PtrSize == 4 -> (LEAL {sym} base) // block rewrites (If (SETL cmp) yes no) -> (LT cmp yes no) @@ -1592,3 +1603,53 @@ && x.Uses == 1 && clobber(x) -> (MOVQstoreidx1 [i-4] {s} p (SHLQconst [2] idx) w0 mem) + +// amd64p32 rules +// same as the rules above, but with 32 instead of 64 bit pointer arithmetic. +// LEAQ,ADDQ -> LEAL,ADDL +(ADDLconst [c] (LEAL [d] {s} x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x) +(LEAL [c] {s} (ADDLconst [d] x)) && is32Bit(c+d) -> (LEAL [c+d] {s} x) + +(MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) +(MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem) +(MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) +(MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) -> + (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) + +(MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) +(MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) +(MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) +(MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) -> + (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + +(MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) +(MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) -> + (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + +(MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVQload [off1+off2] {sym} ptr mem) +(MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVLload [off1+off2] {sym} ptr mem) +(MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVWload [off1+off2] {sym} ptr mem) +(MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(off1+off2) -> (MOVBload [off1+off2] {sym} ptr mem) +(MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVQstore [off1+off2] {sym} ptr val mem) +(MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVLstore [off1+off2] {sym} ptr val mem) +(MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} ptr val mem) +(MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) && is32Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} ptr val mem) +(MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) +(MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) +(MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) +(MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) && ValAndOff(sc).canAdd(off) -> + (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 7319fffa8e..07301618f4 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -354,13 +354,15 @@ func init() { {name: "PXOR", argLength: 2, reg: fp21, asm: "PXOR", commutative: true, resultInArg0: true}, // exclusive or, applied to X regs for float negation. - {name: "LEAQ", argLength: 1, reg: gp11sb, aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux - {name: "LEAQ1", argLength: 2, reg: gp21sb, aux: "SymOff"}, // arg0 + arg1 + auxint + aux - {name: "LEAQ2", argLength: 2, reg: gp21sb, aux: "SymOff"}, // arg0 + 2*arg1 + auxint + aux - {name: "LEAQ4", argLength: 2, reg: gp21sb, aux: "SymOff"}, // arg0 + 4*arg1 + auxint + aux - {name: "LEAQ8", argLength: 2, reg: gp21sb, aux: "SymOff"}, // arg0 + 8*arg1 + auxint + aux + {name: "LEAQ", argLength: 1, reg: gp11sb, asm: "LEAQ", aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux + {name: "LEAQ1", argLength: 2, reg: gp21sb, aux: "SymOff"}, // arg0 + arg1 + auxint + aux + {name: "LEAQ2", argLength: 2, reg: gp21sb, aux: "SymOff"}, // arg0 + 2*arg1 + auxint + aux + {name: "LEAQ4", argLength: 2, reg: gp21sb, aux: "SymOff"}, // arg0 + 4*arg1 + auxint + aux + {name: "LEAQ8", argLength: 2, reg: gp21sb, aux: "SymOff"}, // arg0 + 8*arg1 + auxint + aux // Note: LEAQ{1,2,4,8} must not have OpSB as either argument. + {name: "LEAL", argLength: 1, reg: gp11sb, asm: "LEAL", aux: "SymOff", rematerializeable: true}, // arg0 + auxint + offset encoded in aux + // auxint+aux == add auxint and the offset of the symbol in aux (if any) to the effective address {name: "MOVBload", argLength: 2, reg: gpload, asm: "MOVBLZX", aux: "SymOff", typ: "UInt8"}, // load byte from arg0+auxint+aux. arg1=mem. Zero extend. {name: "MOVBQSXload", argLength: 2, reg: gpload, asm: "MOVBQSX", aux: "SymOff"}, // ditto, sign extend to int64 @@ -499,6 +501,7 @@ func init() { // gets correctly ordered with respect to GC safepoints. // arg0=ptr/int arg1=mem, output=int/ptr {name: "MOVQconvert", argLength: 2, reg: gp11, asm: "MOVQ"}, + {name: "MOVLconvert", argLength: 2, reg: gp11, asm: "MOVL"}, // amd64p32 equivalent // Constant flag values. For any comparison, there are 5 possible // outcomes: the three from the signed total order (<,==,>) and the diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 68b60f2180..7968b1de2f 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -510,6 +510,7 @@ const ( OpAMD64LEAQ2 OpAMD64LEAQ4 OpAMD64LEAQ8 + OpAMD64LEAL OpAMD64MOVBload OpAMD64MOVBQSXload OpAMD64MOVWload @@ -563,6 +564,7 @@ const ( OpAMD64LoweredGetClosurePtr OpAMD64LoweredNilCheck OpAMD64MOVQconvert + OpAMD64MOVLconvert OpAMD64FlagEQ OpAMD64FlagLT_ULT OpAMD64FlagLT_UGT @@ -5926,6 +5928,7 @@ var opcodeTable = [...]opInfo{ auxType: auxSymOff, argLen: 1, rematerializeable: true, + asm: x86.ALEAQ, reg: regInfo{ inputs: []inputInfo{ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB @@ -5991,6 +5994,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LEAL", + auxType: auxSymOff, + argLen: 1, + rematerializeable: true, + asm: x86.ALEAL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB + }, + outputs: []outputInfo{ + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, { name: "MOVBload", auxType: auxSymOff, @@ -6646,6 +6664,19 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MOVLconvert", + argLen: 2, + asm: x86.AMOVL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + outputs: []outputInfo{ + {0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 + }, + }, + }, { name: "FlagEQ", argLen: 0, diff --git a/src/cmd/compile/internal/ssa/opt.go b/src/cmd/compile/internal/ssa/opt.go index f6a904abca..f211488cd7 100644 --- a/src/cmd/compile/internal/ssa/opt.go +++ b/src/cmd/compile/internal/ssa/opt.go @@ -11,7 +11,7 @@ func opt(f *Func) { func dec(f *Func) { applyRewrite(f, rewriteBlockdec, rewriteValuedec) - if f.Config.IntSize == 4 { + if f.Config.IntSize == 4 && f.Config.arch != "amd64p32" { applyRewrite(f, rewriteBlockdec64, rewriteValuedec64) } } diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index 493a370972..708569d8e3 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -492,8 +492,14 @@ func (s *regAllocState) init(f *Func) { s.f.Config.fe.Unimplementedf(0, "arch %s not implemented", s.f.Config.arch) } } - if s.f.Config.nacl && s.f.Config.arch == "arm" { - s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm + if s.f.Config.nacl { + switch s.f.Config.arch { + case "arm": + s.allocatable &^= 1 << 9 // R9 is "thread pointer" on nacl/arm + case "amd64p32": + s.allocatable &^= 1 << 5 // BP - reserved for nacl + s.allocatable &^= 1 << 15 // R15 - reserved for nacl + } } s.regs = make([]regState, s.numRegs) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 3aa38f31ad..d09e706fdd 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -46,6 +46,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpAMD64CMPW(v, config) case OpAMD64CMPWconst: return rewriteValueAMD64_OpAMD64CMPWconst(v, config) + case OpAMD64LEAL: + return rewriteValueAMD64_OpAMD64LEAL(v, config) case OpAMD64LEAQ: return rewriteValueAMD64_OpAMD64LEAQ(v, config) case OpAMD64LEAQ1: @@ -454,6 +456,8 @@ func rewriteValueAMD64(v *Value, config *Config) bool { return rewriteValueAMD64_OpHmul8(v, config) case OpHmul8u: return rewriteValueAMD64_OpHmul8u(v, config) + case OpInt64Hi: + return rewriteValueAMD64_OpInt64Hi(v, config) case OpInterCall: return rewriteValueAMD64_OpInterCall(v, config) case OpIsInBounds: @@ -848,6 +852,27 @@ func rewriteValueAMD64_OpAMD64ADDLconst(v *Value, config *Config) bool { v.AddArg(x) return true } + // match: (ADDLconst [c] (LEAL [d] {s} x)) + // cond: is32Bit(c+d) + // result: (LEAL [c+d] {s} x) + for { + c := v.AuxInt + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + d := v_0.AuxInt + s := v_0.Aux + x := v_0.Args[0] + if !(is32Bit(c + d)) { + break + } + v.reset(OpAMD64LEAL) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + return true + } return false } func rewriteValueAMD64_OpAMD64ADDQ(v *Value, config *Config) bool { @@ -2635,6 +2660,32 @@ func rewriteValueAMD64_OpAMD64CMPWconst(v *Value, config *Config) bool { } return false } +func rewriteValueAMD64_OpAMD64LEAL(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (LEAL [c] {s} (ADDLconst [d] x)) + // cond: is32Bit(c+d) + // result: (LEAL [c+d] {s} x) + for { + c := v.AuxInt + s := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + d := v_0.AuxInt + x := v_0.Args[0] + if !(is32Bit(c + d)) { + break + } + v.reset(OpAMD64LEAL) + v.AuxInt = c + d + v.Aux = s + v.AddArg(x) + return true + } + return false +} func rewriteValueAMD64_OpAMD64LEAQ(v *Value, config *Config) bool { b := v.Block _ = b @@ -3595,6 +3646,53 @@ func rewriteValueAMD64_OpAMD64MOVBload(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVBload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (MOVBload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVBload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64MOVBload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVBloadidx1(v *Value, config *Config) bool { @@ -3910,6 +4008,57 @@ func rewriteValueAMD64_OpAMD64MOVBstore(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVBstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVBstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVBstore [off1+off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64MOVBstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value, config *Config) bool { @@ -4038,6 +4187,53 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + for { + sc := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + break + } + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVBstoreconst [sc] {s} (ADDLconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVBstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) + for { + sc := v.AuxInt + s := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(sc).canAdd(off)) { + break + } + v.reset(OpAMD64MOVBstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = s + v.AddArg(ptr) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVBstoreconstidx1(v *Value, config *Config) bool { @@ -4602,6 +4798,53 @@ func rewriteValueAMD64_OpAMD64MOVLload(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVLload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVLload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64MOVLload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVLloadidx1(v *Value, config *Config) bool { @@ -5018,6 +5261,57 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVLstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVLstore [off1+off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64MOVLstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value, config *Config) bool { @@ -5175,6 +5469,53 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + for { + sc := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + break + } + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVLstoreconst [sc] {s} (ADDLconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVLstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) + for { + sc := v.AuxInt + s := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(sc).canAdd(off)) { + break + } + v.reset(OpAMD64MOVLstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = s + v.AddArg(ptr) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVLstoreconstidx1(v *Value, config *Config) bool { @@ -5973,6 +6314,53 @@ func rewriteValueAMD64_OpAMD64MOVQload(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVQload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVQload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64MOVQload) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVQloadidx1(v *Value, config *Config) bool { @@ -6256,6 +6644,57 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVQstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVQstore [off1+off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64MOVQstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value, config *Config) bool { @@ -6381,6 +6820,53 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + for { + sc := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVQstoreconst [sc] {s} (ADDLconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVQstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) + for { + sc := v.AuxInt + s := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(sc).canAdd(off)) { + break + } + v.reset(OpAMD64MOVQstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = s + v.AddArg(ptr) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVQstoreconstidx1(v *Value, config *Config) bool { @@ -7889,16 +8375,63 @@ func rewriteValueAMD64_OpAMD64MOVWload(v *Value, config *Config) bool { break } ptr := v_0.Args[0] - idx := v_0.Args[1] + idx := v_0.Args[1] + mem := v.Args[1] + if !(ptr.Op != OpSB) { + break + } + v.reset(OpAMD64MOVWloadidx1) + v.AuxInt = off + v.Aux = sym + v.AddArg(ptr) + v.AddArg(idx) + v.AddArg(mem) + return true + } + // match: (MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVWload) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(mem) + return true + } + // match: (MOVWload [off1] {sym} (ADDLconst [off2] ptr) mem) + // cond: is32Bit(off1+off2) + // result: (MOVWload [off1+off2] {sym} ptr mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] mem := v.Args[1] - if !(ptr.Op != OpSB) { + if !(is32Bit(off1 + off2)) { break } - v.reset(OpAMD64MOVWloadidx1) - v.AuxInt = off + v.reset(OpAMD64MOVWload) + v.AuxInt = off1 + off2 v.Aux = sym v.AddArg(ptr) - v.AddArg(idx) v.AddArg(mem) return true } @@ -8318,6 +8851,57 @@ func rewriteValueAMD64_OpAMD64MOVWstore(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) + // cond: canMergeSym(sym1, sym2) + // result: (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem) + for { + off1 := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off2 := v_0.AuxInt + sym2 := v_0.Aux + base := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(canMergeSym(sym1, sym2)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = off1 + off2 + v.Aux = mergeSym(sym1, sym2) + v.AddArg(base) + v.AddArg(val) + v.AddArg(mem) + return true + } + // match: (MOVWstore [off1] {sym} (ADDLconst [off2] ptr) val mem) + // cond: is32Bit(off1+off2) + // result: (MOVWstore [off1+off2] {sym} ptr val mem) + for { + off1 := v.AuxInt + sym := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off2 := v_0.AuxInt + ptr := v_0.Args[0] + val := v.Args[1] + mem := v.Args[2] + if !(is32Bit(off1 + off2)) { + break + } + v.reset(OpAMD64MOVWstore) + v.AuxInt = off1 + off2 + v.Aux = sym + v.AddArg(ptr) + v.AddArg(val) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value, config *Config) bool { @@ -8472,6 +9056,53 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value, config *Config) bool { v.AddArg(mem) return true } + // match: (MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) + // cond: canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem) + for { + sc := v.AuxInt + sym1 := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64LEAL { + break + } + off := v_0.AuxInt + sym2 := v_0.Aux + ptr := v_0.Args[0] + mem := v.Args[1] + if !(canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off)) { + break + } + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = mergeSym(sym1, sym2) + v.AddArg(ptr) + v.AddArg(mem) + return true + } + // match: (MOVWstoreconst [sc] {s} (ADDLconst [off] ptr) mem) + // cond: ValAndOff(sc).canAdd(off) + // result: (MOVWstoreconst [ValAndOff(sc).add(off)] {s} ptr mem) + for { + sc := v.AuxInt + s := v.Aux + v_0 := v.Args[0] + if v_0.Op != OpAMD64ADDLconst { + break + } + off := v_0.AuxInt + ptr := v_0.Args[0] + mem := v.Args[1] + if !(ValAndOff(sc).canAdd(off)) { + break + } + v.reset(OpAMD64MOVWstoreconst) + v.AuxInt = ValAndOff(sc).add(off) + v.Aux = s + v.AddArg(ptr) + v.AddArg(mem) + return true + } return false } func rewriteValueAMD64_OpAMD64MOVWstoreconstidx1(v *Value, config *Config) bool { @@ -12595,31 +13226,67 @@ func rewriteValueAMD64_OpAddPtr(v *Value, config *Config) bool { b := v.Block _ = b // match: (AddPtr x y) - // cond: - // result: (ADDQ x y) + // cond: config.PtrSize == 8 + // result: (ADDQ x y) for { x := v.Args[0] y := v.Args[1] + if !(config.PtrSize == 8) { + break + } v.reset(OpAMD64ADDQ) v.AddArg(x) v.AddArg(y) return true } + // match: (AddPtr x y) + // cond: config.PtrSize == 4 + // result: (ADDL x y) + for { + x := v.Args[0] + y := v.Args[1] + if !(config.PtrSize == 4) { + break + } + v.reset(OpAMD64ADDL) + v.AddArg(x) + v.AddArg(y) + return true + } + return false } func rewriteValueAMD64_OpAddr(v *Value, config *Config) bool { b := v.Block _ = b // match: (Addr {sym} base) - // cond: + // cond: config.PtrSize == 8 // result: (LEAQ {sym} base) for { sym := v.Aux base := v.Args[0] + if !(config.PtrSize == 8) { + break + } v.reset(OpAMD64LEAQ) v.Aux = sym v.AddArg(base) return true } + // match: (Addr {sym} base) + // cond: config.PtrSize == 4 + // result: (LEAL {sym} base) + for { + sym := v.Aux + base := v.Args[0] + if !(config.PtrSize == 4) { + break + } + v.reset(OpAMD64LEAL) + v.Aux = sym + v.AddArg(base) + return true + } + return false } func rewriteValueAMD64_OpAnd16(v *Value, config *Config) bool { b := v.Block @@ -12903,30 +13570,65 @@ func rewriteValueAMD64_OpConstNil(v *Value, config *Config) bool { b := v.Block _ = b // match: (ConstNil) - // cond: + // cond: config.PtrSize == 8 // result: (MOVQconst [0]) for { + if !(config.PtrSize == 8) { + break + } v.reset(OpAMD64MOVQconst) v.AuxInt = 0 return true } + // match: (ConstNil) + // cond: config.PtrSize == 4 + // result: (MOVLconst [0]) + for { + if !(config.PtrSize == 4) { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = 0 + return true + } + return false } func rewriteValueAMD64_OpConvert(v *Value, config *Config) bool { b := v.Block _ = b // match: (Convert x mem) - // cond: + // cond: config.PtrSize == 8 // result: (MOVQconvert x mem) for { t := v.Type x := v.Args[0] mem := v.Args[1] + if !(config.PtrSize == 8) { + break + } v.reset(OpAMD64MOVQconvert) v.Type = t v.AddArg(x) v.AddArg(mem) return true } + // match: (Convert x mem) + // cond: config.PtrSize == 4 + // result: (MOVLconvert x mem) + for { + t := v.Type + x := v.Args[0] + mem := v.Args[1] + if !(config.PtrSize == 4) { + break + } + v.reset(OpAMD64MOVLconvert) + v.Type = t + v.AddArg(x) + v.AddArg(mem) + return true + } + return false } func rewriteValueAMD64_OpCtz16(v *Value, config *Config) bool { b := v.Block @@ -13433,11 +14135,14 @@ func rewriteValueAMD64_OpEqPtr(v *Value, config *Config) bool { b := v.Block _ = b // match: (EqPtr x y) - // cond: + // cond: config.PtrSize == 8 // result: (SETEQ (CMPQ x y)) for { x := v.Args[0] y := v.Args[1] + if !(config.PtrSize == 8) { + break + } v.reset(OpAMD64SETEQ) v0 := b.NewValue0(v.Line, OpAMD64CMPQ, TypeFlags) v0.AddArg(x) @@ -13445,6 +14150,23 @@ func rewriteValueAMD64_OpEqPtr(v *Value, config *Config) bool { v.AddArg(v0) return true } + // match: (EqPtr x y) + // cond: config.PtrSize == 4 + // result: (SETEQ (CMPL x y)) + for { + x := v.Args[0] + y := v.Args[1] + if !(config.PtrSize == 4) { + break + } + v.reset(OpAMD64SETEQ) + v0 := b.NewValue0(v.Line, OpAMD64CMPL, TypeFlags) + v0.AddArg(x) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false } func rewriteValueAMD64_OpGeq16(v *Value, config *Config) bool { b := v.Block @@ -13945,6 +14667,20 @@ func rewriteValueAMD64_OpHmul8u(v *Value, config *Config) bool { return true } } +func rewriteValueAMD64_OpInt64Hi(v *Value, config *Config) bool { + b := v.Block + _ = b + // match: (Int64Hi x) + // cond: + // result: (SHRQconst [32] x) + for { + x := v.Args[0] + v.reset(OpAMD64SHRQconst) + v.AuxInt = 32 + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpInterCall(v *Value, config *Config) bool { b := v.Block _ = b @@ -13983,10 +14719,13 @@ func rewriteValueAMD64_OpIsNonNil(v *Value, config *Config) bool { b := v.Block _ = b // match: (IsNonNil p) - // cond: + // cond: config.PtrSize == 8 // result: (SETNE (TESTQ p p)) for { p := v.Args[0] + if !(config.PtrSize == 8) { + break + } v.reset(OpAMD64SETNE) v0 := b.NewValue0(v.Line, OpAMD64TESTQ, TypeFlags) v0.AddArg(p) @@ -13994,6 +14733,22 @@ func rewriteValueAMD64_OpIsNonNil(v *Value, config *Config) bool { v.AddArg(v0) return true } + // match: (IsNonNil p) + // cond: config.PtrSize == 4 + // result: (SETNE (TESTL p p)) + for { + p := v.Args[0] + if !(config.PtrSize == 4) { + break + } + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Line, OpAMD64TESTL, TypeFlags) + v0.AddArg(p) + v0.AddArg(p) + v.AddArg(v0) + return true + } + return false } func rewriteValueAMD64_OpIsSliceInBounds(v *Value, config *Config) bool { b := v.Block @@ -14356,13 +15111,13 @@ func rewriteValueAMD64_OpLoad(v *Value, config *Config) bool { b := v.Block _ = b // match: (Load ptr mem) - // cond: (is64BitInt(t) || isPtr(t)) + // cond: (is64BitInt(t) || isPtr(t) && config.PtrSize == 8) // result: (MOVQload ptr mem) for { t := v.Type ptr := v.Args[0] mem := v.Args[1] - if !(is64BitInt(t) || isPtr(t)) { + if !(is64BitInt(t) || isPtr(t) && config.PtrSize == 8) { break } v.reset(OpAMD64MOVQload) @@ -14371,13 +15126,13 @@ func rewriteValueAMD64_OpLoad(v *Value, config *Config) bool { return true } // match: (Load ptr mem) - // cond: is32BitInt(t) + // cond: (is32BitInt(t) || isPtr(t) && config.PtrSize == 4) // result: (MOVLload ptr mem) for { t := v.Type ptr := v.Args[0] mem := v.Args[1] - if !(is32BitInt(t)) { + if !(is32BitInt(t) || isPtr(t) && config.PtrSize == 4) { break } v.reset(OpAMD64MOVLload) @@ -15307,7 +16062,7 @@ func rewriteValueAMD64_OpMove(v *Value, config *Config) bool { } // match: (Move [s] dst src mem) // cond: SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size()%16 != 0 && SizeAndAlign(s).Size()%16 <= 8 - // result: (Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16] (ADDQconst dst [SizeAndAlign(s).Size()%16]) (ADDQconst src [SizeAndAlign(s).Size()%16]) (MOVQstore dst (MOVQload src mem) mem)) + // result: (Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16] (OffPtr dst [SizeAndAlign(s).Size()%16]) (OffPtr src [SizeAndAlign(s).Size()%16]) (MOVQstore dst (MOVQload src mem) mem)) for { s := v.AuxInt dst := v.Args[0] @@ -15318,11 +16073,11 @@ func rewriteValueAMD64_OpMove(v *Value, config *Config) bool { } v.reset(OpMove) v.AuxInt = SizeAndAlign(s).Size() - SizeAndAlign(s).Size()%16 - v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, dst.Type) + v0 := b.NewValue0(v.Line, OpOffPtr, dst.Type) v0.AuxInt = SizeAndAlign(s).Size() % 16 v0.AddArg(dst) v.AddArg(v0) - v1 := b.NewValue0(v.Line, OpAMD64ADDQconst, src.Type) + v1 := b.NewValue0(v.Line, OpOffPtr, src.Type) v1.AuxInt = SizeAndAlign(s).Size() % 16 v1.AddArg(src) v.AddArg(v1) @@ -15338,7 +16093,7 @@ func rewriteValueAMD64_OpMove(v *Value, config *Config) bool { } // match: (Move [s] dst src mem) // cond: SizeAndAlign(s).Size() > 16 && SizeAndAlign(s).Size()%16 != 0 && SizeAndAlign(s).Size()%16 > 8 - // result: (Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16] (ADDQconst dst [SizeAndAlign(s).Size()%16]) (ADDQconst src [SizeAndAlign(s).Size()%16]) (MOVOstore dst (MOVOload src mem) mem)) + // result: (Move [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%16] (OffPtr dst [SizeAndAlign(s).Size()%16]) (OffPtr src [SizeAndAlign(s).Size()%16]) (MOVOstore dst (MOVOload src mem) mem)) for { s := v.AuxInt dst := v.Args[0] @@ -15349,11 +16104,11 @@ func rewriteValueAMD64_OpMove(v *Value, config *Config) bool { } v.reset(OpMove) v.AuxInt = SizeAndAlign(s).Size() - SizeAndAlign(s).Size()%16 - v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, dst.Type) + v0 := b.NewValue0(v.Line, OpOffPtr, dst.Type) v0.AuxInt = SizeAndAlign(s).Size() % 16 v0.AddArg(dst) v.AddArg(v0) - v1 := b.NewValue0(v.Line, OpAMD64ADDQconst, src.Type) + v1 := b.NewValue0(v.Line, OpOffPtr, src.Type) v1.AuxInt = SizeAndAlign(s).Size() % 16 v1.AddArg(src) v.AddArg(v1) @@ -15704,11 +16459,14 @@ func rewriteValueAMD64_OpNeqPtr(v *Value, config *Config) bool { b := v.Block _ = b // match: (NeqPtr x y) - // cond: + // cond: config.PtrSize == 8 // result: (SETNE (CMPQ x y)) for { x := v.Args[0] y := v.Args[1] + if !(config.PtrSize == 8) { + break + } v.reset(OpAMD64SETNE) v0 := b.NewValue0(v.Line, OpAMD64CMPQ, TypeFlags) v0.AddArg(x) @@ -15716,6 +16474,23 @@ func rewriteValueAMD64_OpNeqPtr(v *Value, config *Config) bool { v.AddArg(v0) return true } + // match: (NeqPtr x y) + // cond: config.PtrSize == 4 + // result: (SETNE (CMPL x y)) + for { + x := v.Args[0] + y := v.Args[1] + if !(config.PtrSize == 4) { + break + } + v.reset(OpAMD64SETNE) + v0 := b.NewValue0(v.Line, OpAMD64CMPL, TypeFlags) + v0.AddArg(x) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false } func rewriteValueAMD64_OpNilCheck(v *Value, config *Config) bool { b := v.Block @@ -15750,12 +16525,12 @@ func rewriteValueAMD64_OpOffPtr(v *Value, config *Config) bool { b := v.Block _ = b // match: (OffPtr [off] ptr) - // cond: is32Bit(off) + // cond: config.PtrSize == 8 && is32Bit(off) // result: (ADDQconst [off] ptr) for { off := v.AuxInt ptr := v.Args[0] - if !(is32Bit(off)) { + if !(config.PtrSize == 8 && is32Bit(off)) { break } v.reset(OpAMD64ADDQconst) @@ -15764,11 +16539,14 @@ func rewriteValueAMD64_OpOffPtr(v *Value, config *Config) bool { return true } // match: (OffPtr [off] ptr) - // cond: + // cond: config.PtrSize == 8 // result: (ADDQ (MOVQconst [off]) ptr) for { off := v.AuxInt ptr := v.Args[0] + if !(config.PtrSize == 8) { + break + } v.reset(OpAMD64ADDQ) v0 := b.NewValue0(v.Line, OpAMD64MOVQconst, config.fe.TypeUInt64()) v0.AuxInt = off @@ -15776,6 +16554,21 @@ func rewriteValueAMD64_OpOffPtr(v *Value, config *Config) bool { v.AddArg(ptr) return true } + // match: (OffPtr [off] ptr) + // cond: config.PtrSize == 4 + // result: (ADDLconst [off] ptr) + for { + off := v.AuxInt + ptr := v.Args[0] + if !(config.PtrSize == 4) { + break + } + v.reset(OpAMD64ADDLconst) + v.AuxInt = off + v.AddArg(ptr) + return true + } + return false } func rewriteValueAMD64_OpOr16(v *Value, config *Config) bool { b := v.Block @@ -16977,16 +17770,34 @@ func rewriteValueAMD64_OpSubPtr(v *Value, config *Config) bool { b := v.Block _ = b // match: (SubPtr x y) - // cond: - // result: (SUBQ x y) + // cond: config.PtrSize == 8 + // result: (SUBQ x y) for { x := v.Args[0] y := v.Args[1] + if !(config.PtrSize == 8) { + break + } v.reset(OpAMD64SUBQ) v.AddArg(x) v.AddArg(y) return true } + // match: (SubPtr x y) + // cond: config.PtrSize == 4 + // result: (SUBL x y) + for { + x := v.Args[0] + y := v.Args[1] + if !(config.PtrSize == 4) { + break + } + v.reset(OpAMD64SUBL) + v.AddArg(x) + v.AddArg(y) + return true + } + return false } func rewriteValueAMD64_OpTrunc16to8(v *Value, config *Config) bool { b := v.Block @@ -17295,7 +18106,7 @@ func rewriteValueAMD64_OpZero(v *Value, config *Config) bool { } // match: (Zero [s] destptr mem) // cond: SizeAndAlign(s).Size()%8 != 0 && SizeAndAlign(s).Size() > 8 - // result: (Zero [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8] (ADDQconst destptr [SizeAndAlign(s).Size()%8]) (MOVQstoreconst [0] destptr mem)) + // result: (Zero [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8] (OffPtr destptr [SizeAndAlign(s).Size()%8]) (MOVQstoreconst [0] destptr mem)) for { s := v.AuxInt destptr := v.Args[0] @@ -17305,7 +18116,7 @@ func rewriteValueAMD64_OpZero(v *Value, config *Config) bool { } v.reset(OpZero) v.AuxInt = SizeAndAlign(s).Size() - SizeAndAlign(s).Size()%8 - v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, config.fe.TypeUInt64()) + v0 := b.NewValue0(v.Line, OpOffPtr, destptr.Type) v0.AuxInt = SizeAndAlign(s).Size() % 8 v0.AddArg(destptr) v.AddArg(v0) @@ -17390,7 +18201,7 @@ func rewriteValueAMD64_OpZero(v *Value, config *Config) bool { } // match: (Zero [s] destptr mem) // cond: SizeAndAlign(s).Size() <= 1024 && SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size()%16 != 0 && !config.noDuffDevice - // result: (Zero [SizeAndAlign(s).Size()-8] (ADDQconst [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem)) + // result: (Zero [SizeAndAlign(s).Size()-8] (OffPtr [8] destptr) (MOVQstore destptr (MOVQconst [0]) mem)) for { s := v.AuxInt destptr := v.Args[0] @@ -17400,7 +18211,7 @@ func rewriteValueAMD64_OpZero(v *Value, config *Config) bool { } v.reset(OpZero) v.AuxInt = SizeAndAlign(s).Size() - 8 - v0 := b.NewValue0(v.Line, OpAMD64ADDQconst, config.fe.TypeUInt64()) + v0 := b.NewValue0(v.Line, OpOffPtr, destptr.Type) v0.AuxInt = 8 v0.AddArg(destptr) v.AddArg(v0) diff --git a/test/live.go b/test/live.go index f336ad783a..fac2ba8ade 100644 --- a/test/live.go +++ b/test/live.go @@ -1,4 +1,4 @@ -// +build !amd64,!arm +// +build !amd64,!arm,!amd64p32 // errorcheck -0 -l -live -wb=0 // Copyright 2014 The Go Authors. All rights reserved. diff --git a/test/live_ssa.go b/test/live_ssa.go index 35eb035fb1..43106db957 100644 --- a/test/live_ssa.go +++ b/test/live_ssa.go @@ -1,4 +1,4 @@ -// +build amd64 arm +// +build amd64 arm amd64p32 // errorcheck -0 -l -live -wb=0 // Copyright 2014 The Go Authors. All rights reserved. diff --git a/test/nilptr3.go b/test/nilptr3.go index 4615b90845..dfc50ca08f 100644 --- a/test/nilptr3.go +++ b/test/nilptr3.go @@ -2,7 +2,7 @@ // Fails on ppc64x because of incomplete optimization. // See issues 9058. // Same reason for mips64x and s390x. -// +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm +// +build !ppc64,!ppc64le,!mips64,!mips64le,!amd64,!s390x,!arm,!amd64p32 // Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style diff --git a/test/nilptr3_ssa.go b/test/nilptr3_ssa.go index 39c102d51c..ac3e39674e 100644 --- a/test/nilptr3_ssa.go +++ b/test/nilptr3_ssa.go @@ -1,5 +1,5 @@ // errorcheck -0 -d=nil -// +build amd64 arm +// +build amd64 arm amd64p32 // Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style diff --git a/test/sliceopt.go b/test/sliceopt.go index d11c51eaf9..bba8619324 100644 --- a/test/sliceopt.go +++ b/test/sliceopt.go @@ -1,4 +1,4 @@ -// +build !amd64,!arm +// +build !amd64,!arm,!amd64p32 // errorcheck -0 -d=append,slice // Copyright 2015 The Go Authors. All rights reserved. -- 2.48.1