The SSA backend has rules to read the contents of readonly Lsyms.
However, this rule was failing to trigger for many readonly Lsyms.
This is because the readonly attribute that was set on the Node.Name
was not propagated to its Lsym until the dump globals phase, after SSA runs.
To work around this phase ordering problem, introduce Node.SetReadonly,
which sets Node.Name.Readonly and also configures the Lsym
enough that SSA can use it.
This change also fixes a latent problem in the rewrite rule function,
namely that reads past the end of lsym.P were treated as entirely zero,
instead of merely requiring padding with trailing zeros.
This change also adds an amd64 rule needed to fully optimize
the results of this change. It would be better not to need this,
but the zero extension that should handle this for us
gets optimized away too soon (see #36897 for a similar problem).
I have not investigated whether other platforms also need new
rules to take full advantage of the new optimizations.
Compiled code for (interface{})(true) on amd64 goes from:
LEAQ type.bool(SB), AX
MOVBLZX ""..stmp_0(SB), BX
LEAQ runtime.staticbytes(SB), CX
ADDQ CX, BX
to
LEAQ type.bool(SB), AX
LEAQ runtime.staticbytes+1(SB), BX
Prior to this change, the readonly symbol rewrite rules
fired a total of 884 times during make.bash.
Afterwards they fire 1807 times.
file before after Δ %
cgo
4827832 4823736 -4096 -0.085%
compile
24907768 24895656 -12112 -0.049%
fix
3376952 3368760 -8192 -0.243%
pprof
14751700 14747604 -4096 -0.028%
total
120343528 120315032 -28496 -0.024%
Change-Id: I59ea52138276c37840f69e30fb109fd376d579ec
Reviewed-on: https://go-review.googlesource.com/c/go/+/220499
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
n = defaultlit(n, nil)
dowidth(n.Type)
vstat := staticname(n.Type)
- vstat.Name.SetReadonly(true)
+ vstat.MarkReadonly()
var s InitSchedule
s.staticassign(vstat, n)
if s.out != nil {
var statuniqgen int // name generator for static temps
// staticname returns a name backed by a static data symbol.
-// Callers should call n.Name.SetReadonly(true) on the
+// Callers should call n.MarkReadonly on the
// returned node for readonly nodes.
func staticname(t *types.Type) *Node {
// Don't use lookupN; it interns the resulting string, but these are all unique.
if mode&initConst != 0 && !isSmallSliceLit(n) {
vstat = staticname(t)
if ctxt == inInitFunction {
- vstat.Name.SetReadonly(true)
+ vstat.MarkReadonly()
}
fixedlit(ctxt, initKindStatic, n, vstat, init)
}
// make and initialize static arrays
vstatk := staticname(tk)
- vstatk.Name.SetReadonly(true)
+ vstatk.MarkReadonly()
vstate := staticname(te)
- vstate.Name.SetReadonly(true)
+ vstate.MarkReadonly()
datak := nod(OARRAYLIT, nil, nil)
datae := nod(OARRAYLIT, nil, nil)
if var_.isSimpleName() && n.List.Len() > 4 {
// lay out static data
vstat := staticname(t)
- vstat.Name.SetReadonly(true)
+ vstat.MarkReadonly()
ctxt := inInitFunction
if n.Op == OARRAYLIT {
"cmd/compile/internal/syntax"
"cmd/compile/internal/types"
"cmd/internal/obj"
+ "cmd/internal/objabi"
"cmd/internal/src"
"sort"
)
func (n *Node) SetHasOpt(b bool) { n.flags.set(nodeHasOpt, b) }
func (n *Node) SetEmbedded(b bool) { n.flags.set(nodeEmbedded, b) }
+// MarkReadonly indicates that n is an ONAME with readonly contents.
+func (n *Node) MarkReadonly() {
+ if n.Op != ONAME {
+ Fatalf("Node.MarkReadonly %v", n.Op)
+ }
+ n.Name.SetReadonly(true)
+ // Mark the linksym as readonly immediately
+ // so that the SSA backend can use this information.
+ // It will be overridden later during dumpglobls.
+ n.Sym.Linksym().Type = objabi.SRODATA
+}
+
// Val returns the Val for the node.
func (n *Node) Val() Val {
if !n.HasVal() {
// n can be directly represented in the read-only data section.
// Make direct reference to the static data. See issue 12841.
vstat := staticname(n.Type)
- vstat.Name.SetReadonly(true)
+ vstat.MarkReadonly()
fixedlit(inInitFunction, initKindStatic, n, vstat, init)
n = vstat
n = typecheck(n, ctxExpr)
(CMPBload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(int64(int8(c)),off) -> (CMPBconstload {sym} [makeValAndOff(int64(int8(c)),off)] ptr mem)
(MOVBload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read8(sym, off))])
-(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.BigEndian))])
-(MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(int32(read32(sym, off, config.BigEndian)))])
+(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
+(MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))])
// fold constants into instructions
(ADDQ x (MOVQconst [c])) && is32Bit(c) -> (ADDQconst [c] x)
+(ADDQ x (MOVLconst [c])) && is32Bit(c) -> (ADDQconst [int64(int32(c))] x)
(ADDL x (MOVLconst [c])) -> (ADDLconst [c] x)
(SUBQ x (MOVQconst [c])) && is32Bit(c) -> (SUBQconst x [c])
@l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(0,off)] ptr mem)
(MOVBload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read8(sym, off))])
-(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.BigEndian))])
-(MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read32(sym, off, config.BigEndian))])
-(MOVQload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read64(sym, off, config.BigEndian))])
+(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
+(MOVLload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read32(sym, off, config.ctxt.Arch.ByteOrder))])
+(MOVQload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVQconst [int64(read64(sym, off, config.ctxt.Arch.ByteOrder))])
(GE (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 -> (GE (TEQshiftRAreg x y z) yes no)
(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(read8(sym, off))])
-(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(read16(sym, off, config.BigEndian))])
-(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(int32(read32(sym, off, config.BigEndian)))])
+(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
+(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))])
(FSUBD (FNMULD x y) a) -> (FNMADDD a x y)
(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read8(sym, off))])
-(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read16(sym, off, config.BigEndian))])
-(MOVWUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read32(sym, off, config.BigEndian))])
-(MOVDload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read64(sym, off, config.BigEndian))])
+(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
+(MOVWUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read32(sym, off, config.ctxt.Arch.ByteOrder))])
+(MOVDload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVDconst [int64(read64(sym, off, config.ctxt.Arch.ByteOrder))])
(LoweredAddr {sym} [off+off2] base)
// transforming readonly globals into constants
-(I64Load [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+off2) -> (I64Const [int64(read64(sym, off+off2, config.BigEndian))])
-(I64Load32U [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+off2) -> (I64Const [int64(read32(sym, off+off2, config.BigEndian))])
-(I64Load16U [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+off2) -> (I64Const [int64(read16(sym, off+off2, config.BigEndian))])
+(I64Load [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+off2) -> (I64Const [int64(read64(sym, off+off2, config.ctxt.Arch.ByteOrder))])
+(I64Load32U [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+off2) -> (I64Const [int64(read32(sym, off+off2, config.ctxt.Arch.ByteOrder))])
+(I64Load16U [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+off2) -> (I64Const [int64(read16(sym, off+off2, config.ctxt.Arch.ByteOrder))])
(I64Load8U [off] (LoweredAddr {sym} [off2] (SB)) _) && symIsRO(sym) && isU32Bit(off+off2) -> (I64Const [int64(read8(sym, off+off2))])
}
// read16 reads two bytes from the read-only global sym at offset off.
-func read16(sym interface{}, off int64, bigEndian bool) uint16 {
+func read16(sym interface{}, off int64, byteorder binary.ByteOrder) uint16 {
lsym := sym.(*obj.LSym)
- if off >= int64(len(lsym.P))-1 || off < 0 {
- return 0
- }
- if bigEndian {
- return binary.BigEndian.Uint16(lsym.P[off:])
- } else {
- return binary.LittleEndian.Uint16(lsym.P[off:])
+ // lsym.P is written lazily.
+ // Bytes requested after the end of lsym.P are 0.
+ var src []byte
+ if 0 <= off && off < int64(len(lsym.P)) {
+ src = lsym.P[off:]
}
+ buf := make([]byte, 2)
+ copy(buf, src)
+ return byteorder.Uint16(buf)
}
// read32 reads four bytes from the read-only global sym at offset off.
-func read32(sym interface{}, off int64, bigEndian bool) uint32 {
+func read32(sym interface{}, off int64, byteorder binary.ByteOrder) uint32 {
lsym := sym.(*obj.LSym)
- if off >= int64(len(lsym.P))-3 || off < 0 {
- return 0
- }
- if bigEndian {
- return binary.BigEndian.Uint32(lsym.P[off:])
- } else {
- return binary.LittleEndian.Uint32(lsym.P[off:])
+ var src []byte
+ if 0 <= off && off < int64(len(lsym.P)) {
+ src = lsym.P[off:]
}
+ buf := make([]byte, 4)
+ copy(buf, src)
+ return byteorder.Uint32(buf)
}
// read64 reads eight bytes from the read-only global sym at offset off.
-func read64(sym interface{}, off int64, bigEndian bool) uint64 {
+func read64(sym interface{}, off int64, byteorder binary.ByteOrder) uint64 {
lsym := sym.(*obj.LSym)
- if off >= int64(len(lsym.P))-7 || off < 0 {
- return 0
- }
- if bigEndian {
- return binary.BigEndian.Uint64(lsym.P[off:])
- } else {
- return binary.LittleEndian.Uint64(lsym.P[off:])
+ var src []byte
+ if 0 <= off && off < int64(len(lsym.P)) {
+ src = lsym.P[off:]
}
+ buf := make([]byte, 8)
+ copy(buf, src)
+ return byteorder.Uint64(buf)
}
}
// match: (MOVLload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVLconst [int64(int32(read32(sym, off, config.BigEndian)))])
+ // result: (MOVLconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(Op386MOVLconst)
- v.AuxInt = int64(int32(read32(sym, off, config.BigEndian)))
+ v.AuxInt = int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))
return true
}
return false
}
// match: (MOVWload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVLconst [int64(read16(sym, off, config.BigEndian))])
+ // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(Op386MOVLconst)
- v.AuxInt = int64(read16(sym, off, config.BigEndian))
+ v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
break
}
+ // match: (ADDQ x (MOVLconst [c]))
+ // cond: is32Bit(c)
+ // result: (ADDQconst [int64(int32(c))] x)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ x := v_0
+ if v_1.Op != OpAMD64MOVLconst {
+ continue
+ }
+ c := v_1.AuxInt
+ if !(is32Bit(c)) {
+ continue
+ }
+ v.reset(OpAMD64ADDQconst)
+ v.AuxInt = int64(int32(c))
+ v.AddArg(x)
+ return true
+ }
+ break
+ }
// match: (ADDQ (SHLQconst x [c]) (SHRQconst x [d]))
// cond: d==64-c
// result: (ROLQconst x [c])
}
// match: (MOVLload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVQconst [int64(read32(sym, off, config.BigEndian))])
+ // result: (MOVQconst [int64(read32(sym, off, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(OpAMD64MOVQconst)
- v.AuxInt = int64(read32(sym, off, config.BigEndian))
+ v.AuxInt = int64(read32(sym, off, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
// match: (MOVQload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVQconst [int64(read64(sym, off, config.BigEndian))])
+ // result: (MOVQconst [int64(read64(sym, off, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(OpAMD64MOVQconst)
- v.AuxInt = int64(read64(sym, off, config.BigEndian))
+ v.AuxInt = int64(read64(sym, off, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
// match: (MOVWload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVLconst [int64(read16(sym, off, config.BigEndian))])
+ // result: (MOVLconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(OpAMD64MOVLconst)
- v.AuxInt = int64(read16(sym, off, config.BigEndian))
+ v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
// match: (MOVHUload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVWconst [int64(read16(sym, off, config.BigEndian))])
+ // result: (MOVWconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(OpARMMOVWconst)
- v.AuxInt = int64(read16(sym, off, config.BigEndian))
+ v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
// match: (MOVWload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVWconst [int64(int32(read32(sym, off, config.BigEndian)))])
+ // result: (MOVWconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(OpARMMOVWconst)
- v.AuxInt = int64(int32(read32(sym, off, config.BigEndian)))
+ v.AuxInt = int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))
return true
}
return false
}
// match: (MOVDload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVDconst [int64(read64(sym, off, config.BigEndian))])
+ // result: (MOVDconst [int64(read64(sym, off, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(OpARM64MOVDconst)
- v.AuxInt = int64(read64(sym, off, config.BigEndian))
+ v.AuxInt = int64(read64(sym, off, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
// match: (MOVHUload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVDconst [int64(read16(sym, off, config.BigEndian))])
+ // result: (MOVDconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(OpARM64MOVDconst)
- v.AuxInt = int64(read16(sym, off, config.BigEndian))
+ v.AuxInt = int64(read16(sym, off, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
// match: (MOVWUload [off] {sym} (SB) _)
// cond: symIsRO(sym)
- // result: (MOVDconst [int64(read32(sym, off, config.BigEndian))])
+ // result: (MOVDconst [int64(read32(sym, off, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
sym := v.Aux
break
}
v.reset(OpARM64MOVDconst)
- v.AuxInt = int64(read32(sym, off, config.BigEndian))
+ v.AuxInt = int64(read32(sym, off, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
// match: (I64Load [off] (LoweredAddr {sym} [off2] (SB)) _)
// cond: symIsRO(sym) && isU32Bit(off+off2)
- // result: (I64Const [int64(read64(sym, off+off2, config.BigEndian))])
+ // result: (I64Const [int64(read64(sym, off+off2, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
if v_0.Op != OpWasmLoweredAddr {
break
}
v.reset(OpWasmI64Const)
- v.AuxInt = int64(read64(sym, off+off2, config.BigEndian))
+ v.AuxInt = int64(read64(sym, off+off2, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
// match: (I64Load16U [off] (LoweredAddr {sym} [off2] (SB)) _)
// cond: symIsRO(sym) && isU32Bit(off+off2)
- // result: (I64Const [int64(read16(sym, off+off2, config.BigEndian))])
+ // result: (I64Const [int64(read16(sym, off+off2, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
if v_0.Op != OpWasmLoweredAddr {
break
}
v.reset(OpWasmI64Const)
- v.AuxInt = int64(read16(sym, off+off2, config.BigEndian))
+ v.AuxInt = int64(read16(sym, off+off2, config.ctxt.Arch.ByteOrder))
return true
}
return false
}
// match: (I64Load32U [off] (LoweredAddr {sym} [off2] (SB)) _)
// cond: symIsRO(sym) && isU32Bit(off+off2)
- // result: (I64Const [int64(read32(sym, off+off2, config.BigEndian))])
+ // result: (I64Const [int64(read32(sym, off+off2, config.ctxt.Arch.ByteOrder))])
for {
off := v.AuxInt
if v_0.Op != OpWasmLoweredAddr {
break
}
v.reset(OpWasmI64Const)
- v.AuxInt = int64(read32(sym, off+off2, config.BigEndian))
+ v.AuxInt = int64(read32(sym, off+off2, config.ctxt.Arch.ByteOrder))
return true
}
return false