From a4650a2111b2bb826ca64a13bdad9c96e3095e47 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Sun, 10 Apr 2016 09:44:17 -0700 Subject: [PATCH] cmd/compile: avoid write barrier in append fast path MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When we are writing the result of an append back to the same slice, we don’t need a write barrier on the fast path. This re-implements an optimization that was present in the old backend. Updates #14921 Fixes #14969 Sample code: var x []byte func p() { x = append(x, 1, 2, 3) } Before: "".p t=1 size=224 args=0x0 locals=0x48 0x0000 00000 (append.go:21) TEXT "".p(SB), $72-0 0x0000 00000 (append.go:21) MOVQ (TLS), CX 0x0009 00009 (append.go:21) CMPQ SP, 16(CX) 0x000d 00013 (append.go:21) JLS 199 0x0013 00019 (append.go:21) SUBQ $72, SP 0x0017 00023 (append.go:21) FUNCDATA $0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:21) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:19) MOVQ "".x+16(SB), CX 0x001e 00030 (append.go:19) MOVQ "".x(SB), DX 0x0025 00037 (append.go:19) MOVQ "".x+8(SB), BX 0x002c 00044 (append.go:19) MOVQ BX, "".autotmp_0+64(SP) 0x0031 00049 (append.go:22) LEAQ 3(BX), BP 0x0035 00053 (append.go:22) CMPQ BP, CX 0x0038 00056 (append.go:22) JGT $0, 131 0x003a 00058 (append.go:22) MOVB $1, (DX)(BX*1) 0x003e 00062 (append.go:22) MOVB $2, 1(DX)(BX*1) 0x0043 00067 (append.go:22) MOVB $3, 2(DX)(BX*1) 0x0048 00072 (append.go:22) MOVQ BP, "".x+8(SB) 0x004f 00079 (append.go:22) MOVQ CX, "".x+16(SB) 0x0056 00086 (append.go:22) MOVL runtime.writeBarrier(SB), AX 0x005c 00092 (append.go:22) TESTB AL, AL 0x005e 00094 (append.go:22) JNE $0, 108 0x0060 00096 (append.go:22) MOVQ DX, "".x(SB) 0x0067 00103 (append.go:23) ADDQ $72, SP 0x006b 00107 (append.go:23) RET 0x006c 00108 (append.go:22) LEAQ "".x(SB), CX 0x0073 00115 (append.go:22) MOVQ CX, (SP) 0x0077 00119 (append.go:22) MOVQ DX, 8(SP) 0x007c 00124 (append.go:22) PCDATA $0, $0 0x007c 00124 (append.go:22) CALL runtime.writebarrierptr(SB) 0x0081 00129 (append.go:23) JMP 103 0x0083 00131 (append.go:22) LEAQ type.[]uint8(SB), AX 0x008a 00138 (append.go:22) MOVQ AX, (SP) 0x008e 00142 (append.go:22) MOVQ DX, 8(SP) 0x0093 00147 (append.go:22) MOVQ BX, 16(SP) 0x0098 00152 (append.go:22) MOVQ CX, 24(SP) 0x009d 00157 (append.go:22) MOVQ BP, 32(SP) 0x00a2 00162 (append.go:22) PCDATA $0, $0 0x00a2 00162 (append.go:22) CALL runtime.growslice(SB) 0x00a7 00167 (append.go:22) MOVQ 40(SP), DX 0x00ac 00172 (append.go:22) MOVQ 48(SP), AX 0x00b1 00177 (append.go:22) MOVQ 56(SP), CX 0x00b6 00182 (append.go:22) ADDQ $3, AX 0x00ba 00186 (append.go:19) MOVQ "".autotmp_0+64(SP), BX 0x00bf 00191 (append.go:22) MOVQ AX, BP 0x00c2 00194 (append.go:22) JMP 58 0x00c7 00199 (append.go:22) NOP 0x00c7 00199 (append.go:21) CALL runtime.morestack_noctxt(SB) 0x00cc 00204 (append.go:21) JMP 0 After: "".p t=1 size=208 args=0x0 locals=0x48 0x0000 00000 (append.go:21) TEXT "".p(SB), $72-0 0x0000 00000 (append.go:21) MOVQ (TLS), CX 0x0009 00009 (append.go:21) CMPQ SP, 16(CX) 0x000d 00013 (append.go:21) JLS 191 0x0013 00019 (append.go:21) SUBQ $72, SP 0x0017 00023 (append.go:21) FUNCDATA $0, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:21) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x0017 00023 (append.go:19) MOVQ "".x+16(SB), CX 0x001e 00030 (append.go:19) MOVQ "".x+8(SB), DX 0x0025 00037 (append.go:19) MOVQ DX, "".autotmp_0+64(SP) 0x002a 00042 (append.go:19) MOVQ "".x(SB), BX 0x0031 00049 (append.go:22) LEAQ 3(DX), BP 0x0035 00053 (append.go:22) MOVQ BP, "".x+8(SB) 0x003c 00060 (append.go:22) CMPQ BP, CX 0x003f 00063 (append.go:22) JGT $0, 84 0x0041 00065 (append.go:22) MOVB $1, (BX)(DX*1) 0x0045 00069 (append.go:22) MOVB $2, 1(BX)(DX*1) 0x004a 00074 (append.go:22) MOVB $3, 2(BX)(DX*1) 0x004f 00079 (append.go:23) ADDQ $72, SP 0x0053 00083 (append.go:23) RET 0x0054 00084 (append.go:22) LEAQ type.[]uint8(SB), AX 0x005b 00091 (append.go:22) MOVQ AX, (SP) 0x005f 00095 (append.go:22) MOVQ BX, 8(SP) 0x0064 00100 (append.go:22) MOVQ DX, 16(SP) 0x0069 00105 (append.go:22) MOVQ CX, 24(SP) 0x006e 00110 (append.go:22) MOVQ BP, 32(SP) 0x0073 00115 (append.go:22) PCDATA $0, $0 0x0073 00115 (append.go:22) CALL runtime.growslice(SB) 0x0078 00120 (append.go:22) MOVQ 40(SP), CX 0x007d 00125 (append.go:22) MOVQ 56(SP), AX 0x0082 00130 (append.go:22) MOVQ AX, "".x+16(SB) 0x0089 00137 (append.go:22) MOVL runtime.writeBarrier(SB), AX 0x008f 00143 (append.go:22) TESTB AL, AL 0x0091 00145 (append.go:22) JNE $0, 168 0x0093 00147 (append.go:22) MOVQ CX, "".x(SB) 0x009a 00154 (append.go:22) MOVQ "".x(SB), BX 0x00a1 00161 (append.go:19) MOVQ "".autotmp_0+64(SP), DX 0x00a6 00166 (append.go:22) JMP 65 0x00a8 00168 (append.go:22) LEAQ "".x(SB), DX 0x00af 00175 (append.go:22) MOVQ DX, (SP) 0x00b3 00179 (append.go:22) MOVQ CX, 8(SP) 0x00b8 00184 (append.go:22) PCDATA $0, $0 0x00b8 00184 (append.go:22) CALL runtime.writebarrierptr(SB) 0x00bd 00189 (append.go:22) JMP 154 0x00bf 00191 (append.go:22) NOP 0x00bf 00191 (append.go:21) CALL runtime.morestack_noctxt(SB) 0x00c4 00196 (append.go:21) JMP 0 Change-Id: I77a41ad3a22557a4bb4654de7d6d24a029efe34a Reviewed-on: https://go-review.googlesource.com/21813 Run-TryBot: Josh Bleecher Snyder TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- src/cmd/compile/internal/gc/ssa.go | 123 ++++++++++++++++++++++------- 1 file changed, 96 insertions(+), 27 deletions(-) diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 5ee370395b..beb68b0385 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -683,14 +683,27 @@ func (s *state) stmt(n *Node) { // Evaluate RHS. rhs := n.Right - if rhs != nil && (rhs.Op == OSTRUCTLIT || rhs.Op == OARRAYLIT) { - // All literals with nonzero fields have already been - // rewritten during walk. Any that remain are just T{} - // or equivalents. Use the zero value. - if !iszero(rhs) { - Fatalf("literal with nonzero value in SSA: %v", rhs) + if rhs != nil { + switch rhs.Op { + case OSTRUCTLIT, OARRAYLIT: + // All literals with nonzero fields have already been + // rewritten during walk. Any that remain are just T{} + // or equivalents. Use the zero value. + if !iszero(rhs) { + Fatalf("literal with nonzero value in SSA: %v", rhs) + } + rhs = nil + case OAPPEND: + // If we're writing the result of an append back to the same slice, + // handle it specially to avoid write barriers on the fast (non-growth) path. + // If the slice can be SSA'd, it'll be on the stack, + // so there will be no write barriers, + // so there's no need to attempt to prevent them. + if samesafeexpr(n.Left, rhs.List.First()) && !s.canSSA(n.Left) { + s.append(rhs, true) + return + } } - rhs = nil } var r *ssa.Value needwb := n.Op == OASWB && rhs != nil @@ -709,11 +722,11 @@ func (s *state) stmt(n *Node) { } } if rhs != nil && rhs.Op == OAPPEND { - // Yuck! The frontend gets rid of the write barrier, but we need it! - // At least, we need it in the case where growslice is called. - // TODO: Do the write barrier on just the growslice branch. + // The frontend gets rid of the write barrier to enable the special OAPPEND + // handling above, but since this is not a special case, we need it. // TODO: just add a ptr graying to the end of growslice? - // TODO: check whether we need to do this for ODOTTYPE and ORECV also. + // TODO: check whether we need to provide special handling and a write barrier + // for ODOTTYPE and ORECV also. // They get similar wb-removal treatment in walk.go:OAS. needwb = true } @@ -2079,7 +2092,7 @@ func (s *state) expr(n *Node) *ssa.Value { return s.newValue1(ssa.OpGetG, n.Type, s.mem()) case OAPPEND: - return s.exprAppend(n) + return s.append(n, false) default: s.Unimplementedf("unhandled expr %s", opnames[n.Op]) @@ -2087,25 +2100,57 @@ func (s *state) expr(n *Node) *ssa.Value { } } -// exprAppend converts an OAPPEND node n to an ssa.Value, adds it to s, and returns the Value. -func (s *state) exprAppend(n *Node) *ssa.Value { - // append(s, e1, e2, e3). Compile like: +// append converts an OAPPEND node to SSA. +// If inplace is false, it converts the OAPPEND expression n to an ssa.Value, +// adds it to s, and returns the Value. +// If inplace is true, it writes the result of the OAPPEND expression n +// back to the slice being appended to, and returns nil. +// inplace MUST be set to false if the slice can be SSA'd. +func (s *state) append(n *Node, inplace bool) *ssa.Value { + // If inplace is false, process as expression "append(s, e1, e2, e3)": + // // ptr, len, cap := s // newlen := len + 3 - // if newlen > s.cap { + // if newlen > cap { // ptr, len, cap = growslice(s, newlen) // newlen = len + 3 // recalculate to avoid a spill // } + // // with write barriers, if needed: + // *(ptr+len) = e1 + // *(ptr+len+1) = e2 + // *(ptr+len+2) = e3 + // return makeslice(ptr, newlen, cap) + // + // + // If inplace is true, process as statement "s = append(s, e1, e2, e3)": + // + // a := &s + // ptr, len, cap := s + // newlen := len + 3 + // *a.len = newlen // store newlen immediately to avoid a spill + // if newlen > cap { + // newptr, _, newcap = growslice(ptr, len, cap, newlen) + // *a.cap = newcap // write before ptr to avoid a spill + // *a.ptr = newptr // with write barrier + // } + // // with write barriers, if needed: // *(ptr+len) = e1 // *(ptr+len+1) = e2 // *(ptr+len+2) = e3 - // makeslice(ptr, newlen, cap) et := n.Type.Elem() pt := Ptrto(et) // Evaluate slice - slice := s.expr(n.List.First()) + sn := n.List.First() // the slice node is the first in the list + + var slice, addr *ssa.Value + if inplace { + addr = s.addr(sn, false) + slice = s.newValue2(ssa.OpLoad, n.Type, addr, s.mem()) + } else { + slice = s.expr(sn) + } // Allocate new blocks grow := s.f.NewBlock(ssa.BlockPlain) @@ -2117,10 +2162,20 @@ func (s *state) exprAppend(n *Node) *ssa.Value { l := s.newValue1(ssa.OpSliceLen, Types[TINT], slice) c := s.newValue1(ssa.OpSliceCap, Types[TINT], slice) nl := s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], l, s.constInt(Types[TINT], nargs)) + + if inplace { + lenaddr := s.newValue1I(ssa.OpOffPtr, pt, int64(Array_nel), addr) + s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, s.config.IntSize, lenaddr, nl, s.mem()) + } + cmp := s.newValue2(s.ssaOp(OGT, Types[TINT]), Types[TBOOL], nl, c) s.vars[&ptrVar] = p - s.vars[&newlenVar] = nl - s.vars[&capVar] = c + + if !inplace { + s.vars[&newlenVar] = nl + s.vars[&capVar] = c + } + b := s.endBlock() b.Kind = ssa.BlockIf b.Likely = ssa.BranchUnlikely @@ -2134,9 +2189,18 @@ func (s *state) exprAppend(n *Node) *ssa.Value { r := s.rtcall(growslice, true, []*Type{pt, Types[TINT], Types[TINT]}, taddr, p, l, c, nl) - s.vars[&ptrVar] = r[0] - s.vars[&newlenVar] = s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], r[1], s.constInt(Types[TINT], nargs)) - s.vars[&capVar] = r[2] + if inplace { + capaddr := s.newValue1I(ssa.OpOffPtr, pt, int64(Array_cap), addr) + s.vars[&memVar] = s.newValue3I(ssa.OpStore, ssa.TypeMem, s.config.IntSize, capaddr, r[2], s.mem()) + s.insertWBstore(pt, addr, r[0], n.Lineno, 0) + // load the value we just stored to avoid having to spill it + s.vars[&ptrVar] = s.newValue2(ssa.OpLoad, pt, addr, s.mem()) + } else { + s.vars[&ptrVar] = r[0] + s.vars[&newlenVar] = s.newValue2(s.ssaOp(OADD, Types[TINT]), Types[TINT], r[1], s.constInt(Types[TINT], nargs)) + s.vars[&capVar] = r[2] + } + b = s.endBlock() b.AddEdgeTo(assign) @@ -2156,9 +2220,11 @@ func (s *state) exprAppend(n *Node) *ssa.Value { } } - p = s.variable(&ptrVar, pt) // generates phi for ptr - nl = s.variable(&newlenVar, Types[TINT]) // generates phi for nl - c = s.variable(&capVar, Types[TINT]) // generates phi for cap + p = s.variable(&ptrVar, pt) // generates phi for ptr + if !inplace { + nl = s.variable(&newlenVar, Types[TINT]) // generates phi for nl + c = s.variable(&capVar, Types[TINT]) // generates phi for cap + } p2 := s.newValue2(ssa.OpPtrIndex, pt, p, l) // TODO: just one write barrier call for all of these writes? // TODO: maybe just one writeBarrier.enabled check? @@ -2179,10 +2245,13 @@ func (s *state) exprAppend(n *Node) *ssa.Value { } } - // make result delete(s.vars, &ptrVar) + if inplace { + return nil + } delete(s.vars, &newlenVar) delete(s.vars, &capVar) + // make result return s.newValue3(ssa.OpSliceMake, n.Type, p, nl, c) } -- 2.48.1