From 79561a84ceb4435c1294767d26b0b8a0dd77809d Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 20 Oct 2016 22:45:18 -0400 Subject: [PATCH] runtime: simplify reflectcall write barriers Currently reflectcall has a subtle dance with write barriers where the assembly code copies the result values from the stack to the in-heap argument frame without write barriers and then calls into the runtime after the fact to invoke the necessary write barriers. For the hybrid barrier (and for ROC), we need to switch to a *pre*-write write barrier, which is very difficult to do with the current setup. We could tie ourselves in knots of subtle reasoning about why it's okay in this particular case to have a post-write write barrier, but this commit instead takes a different approach. Rather than making things more complex, this simplifies reflection calls so that the argument copy is done in Go using normal bulk write barriers. The one difficulty with this approach is that calling into Go requires putting arguments on the stack, but the call* functions "donate" their entire stack frame to the called function. We can get away with this now because the copy avoids using the stack and has copied the results out before we clobber the stack frame to call into the write barrier. The solution in this CL is to call another function, passing arguments in registers instead of on the stack, and let that other function reserve more stack space and setup the arguments for the runtime. This approach seemed to work out the best. I also tried making the call* functions reserve 32 extra bytes of frame for the write barrier arguments and adjust SP up by 32 bytes around the call. However, even with the necessary changes to the assembler to correct the spdelta table, the runtime was still having trouble with the frame layout (and the changes to the assembler caused many other things that do strange things with the SP to fail to assemble). The approach I took doesn't require any funny business with the SP. Updates #17503. Change-Id: Ie2bb0084b24d6cff38b5afb218b9e0534ad2119e Reviewed-on: https://go-review.googlesource.com/31655 Run-TryBot: Austin Clements Reviewed-by: Cherry Zhang --- src/reflect/value.go | 2 +- src/runtime/asm_386.s | 25 ++++++++++++++----------- src/runtime/asm_amd64.s | 30 ++++++++++++++++-------------- src/runtime/asm_amd64p32.s | 25 ++++++++++++++----------- src/runtime/asm_arm.s | 32 ++++++++++++++------------------ src/runtime/asm_arm64.s | 38 +++++++++++++++----------------------- src/runtime/asm_mips64x.s | 38 +++++++++++++++----------------------- src/runtime/asm_ppc64x.s | 38 +++++++++++++++----------------------- src/runtime/asm_s390x.s | 37 ++++++++++++++----------------------- src/runtime/mbarrier.go | 24 +++++++++++++----------- 10 files changed, 131 insertions(+), 158 deletions(-) diff --git a/src/reflect/value.go b/src/reflect/value.go index f9080abff4..fa1b3e3b51 100644 --- a/src/reflect/value.go +++ b/src/reflect/value.go @@ -447,7 +447,7 @@ func (v Value) call(op string, in []Value) []Value { // because the Values returned by this function contain pointers to the args object, // and will thus keep the args object alive indefinitely. memclr(args, retOffset) - // Copy return values out of args. + // Wrap Values around return values in args. ret = make([]Value, nout) off = retOffset for i := 0; i < nout; i++ { diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s index 67b4cab77e..68d1e51265 100644 --- a/src/runtime/asm_386.s +++ b/src/runtime/asm_386.s @@ -477,6 +477,7 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ PCDATA $PCDATA_StackMapIndex, $0; \ CALL AX; \ /* copy return values back */ \ + MOVL argtype+0(FP), DX; \ MOVL argptr+8(FP), DI; \ MOVL argsize+12(FP), CX; \ MOVL retoffset+16(FP), BX; \ @@ -484,17 +485,19 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ ADDL BX, DI; \ ADDL BX, SI; \ SUBL BX, CX; \ - REP;MOVSB; \ - /* execute write barrier updates */ \ - MOVL argtype+0(FP), DX; \ - MOVL argptr+8(FP), DI; \ - MOVL argsize+12(FP), CX; \ - MOVL retoffset+16(FP), BX; \ - MOVL DX, 0(SP); \ - MOVL DI, 4(SP); \ - MOVL CX, 8(SP); \ - MOVL BX, 12(SP); \ - CALL runtime·callwritebarrier(SB); \ + CALL callRet<>(SB); \ + RET + +// callRet copies return values back at the end of call*. This is a +// separate function so it can allocate stack space for the arguments +// to reflectcallmove. It does not follow the Go ABI; it expects its +// arguments in registers. +TEXT callRet<>(SB), NOSPLIT, $16-0 + MOVL DX, 0(SP) + MOVL DI, 4(SP) + MOVL SI, 8(SP) + MOVL CX, 12(SP) + CALL runtime·reflectcallmove(SB) RET CALLFN(·call16, 16) diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index 398b14888f..bcc9cad655 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -416,8 +416,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0 TEXT ·reflectcall(SB), NOSPLIT, $0-32 MOVLQZX argsize+24(FP), CX - // NOTE(rsc): No call16, because CALLFN needs four words - // of argument space to invoke callwritebarrier. DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -460,24 +458,28 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-32; \ PCDATA $PCDATA_StackMapIndex, $0; \ CALL (DX); \ /* copy return values back */ \ + MOVQ argtype+0(FP), DX; \ MOVQ argptr+16(FP), DI; \ MOVLQZX argsize+24(FP), CX; \ - MOVLQZX retoffset+28(FP), BX; \ + MOVLQZX retoffset+28(FP), BX; \ MOVQ SP, SI; \ ADDQ BX, DI; \ ADDQ BX, SI; \ SUBQ BX, CX; \ - REP;MOVSB; \ - /* execute write barrier updates */ \ - MOVQ argtype+0(FP), DX; \ - MOVQ argptr+16(FP), DI; \ - MOVLQZX argsize+24(FP), CX; \ - MOVLQZX retoffset+28(FP), BX; \ - MOVQ DX, 0(SP); \ - MOVQ DI, 8(SP); \ - MOVQ CX, 16(SP); \ - MOVQ BX, 24(SP); \ - CALL runtime·callwritebarrier(SB); \ + CALL callRet<>(SB); \ + RET + +// callRet copies return values back at the end of call*. This is a +// separate function so it can allocate stack space for the arguments +// to reflectcallmove. It does not follow the Go ABI; it expects its +// arguments in registers. +TEXT callRet<>(SB), NOSPLIT, $32-0 + NO_LOCAL_POINTERS + MOVQ DX, 0(SP) + MOVQ DI, 8(SP) + MOVQ SI, 16(SP) + MOVQ CX, 24(SP) + CALL runtime·reflectcallmove(SB) RET CALLFN(·call32, 32) diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s index fab6c0db5d..60613b175d 100644 --- a/src/runtime/asm_amd64p32.s +++ b/src/runtime/asm_amd64p32.s @@ -371,6 +371,7 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ MOVL (DX), AX; \ CALL AX; \ /* copy return values back */ \ + MOVL argtype+0(FP), DX; \ MOVL argptr+8(FP), DI; \ MOVL argsize+12(FP), CX; \ MOVL retoffset+16(FP), BX; \ @@ -378,17 +379,19 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ ADDL BX, DI; \ ADDL BX, SI; \ SUBL BX, CX; \ - REP;MOVSB; \ - /* execute write barrier updates */ \ - MOVL argtype+0(FP), DX; \ - MOVL argptr+8(FP), DI; \ - MOVL argsize+12(FP), CX; \ - MOVL retoffset+16(FP), BX; \ - MOVL DX, 0(SP); \ - MOVL DI, 4(SP); \ - MOVL CX, 8(SP); \ - MOVL BX, 12(SP); \ - CALL runtime·callwritebarrier(SB); \ + CALL callRet<>(SB); \ + RET + +// callRet copies return values back at the end of call*. This is a +// separate function so it can allocate stack space for the arguments +// to reflectcallmove. It does not follow the Go ABI; it expects its +// arguments in registers. +TEXT callRet<>(SB), NOSPLIT, $16-0 + MOVL DX, 0(SP) + MOVL DI, 4(SP) + MOVL SI, 8(SP) + MOVL CX, 12(SP) + CALL runtime·reflectcallmove(SB) RET CALLFN(·call16, 16) diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s index 3bfa250c99..aa7b74827a 100644 --- a/src/runtime/asm_arm.s +++ b/src/runtime/asm_arm.s @@ -406,6 +406,7 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ PCDATA $PCDATA_StackMapIndex, $0; \ BL (R0); \ /* copy return values back */ \ + MOVW argtype+0(FP), R4; \ MOVW argptr+8(FP), R0; \ MOVW argsize+12(FP), R2; \ MOVW retoffset+16(FP), R3; \ @@ -413,24 +414,19 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-20; \ ADD R3, R1; \ ADD R3, R0; \ SUB R3, R2; \ -loop: \ - CMP $0, R2; \ - B.EQ end; \ - MOVBU.P 1(R1), R5; \ - MOVBU.P R5, 1(R0); \ - SUB $1, R2, R2; \ - B loop; \ -end: \ - /* execute write barrier updates */ \ - MOVW argtype+0(FP), R1; \ - MOVW argptr+8(FP), R0; \ - MOVW argsize+12(FP), R2; \ - MOVW retoffset+16(FP), R3; \ - MOVW R1, 4(R13); \ - MOVW R0, 8(R13); \ - MOVW R2, 12(R13); \ - MOVW R3, 16(R13); \ - BL runtime·callwritebarrier(SB); \ + BL callRet<>(SB); \ + RET + +// callRet copies return values back at the end of call*. This is a +// separate function so it can allocate stack space for the arguments +// to reflectcallmove. It does not follow the Go ABI; it expects its +// arguments in registers. +TEXT callRet<>(SB), NOSPLIT, $16-0 + MOVW R4, 4(R13) + MOVW R0, 8(R13) + MOVW R1, 12(R13) + MOVW R2, 16(R13) + BL runtime·reflectcallmove(SB) RET CALLFN(·call16, 16) diff --git a/src/runtime/asm_arm64.s b/src/runtime/asm_arm64.s index 2d73052c23..675abb51d3 100644 --- a/src/runtime/asm_arm64.s +++ b/src/runtime/asm_arm64.s @@ -335,8 +335,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0 TEXT ·reflectcall(SB), NOSPLIT, $-8-32 MOVWU argsize+24(FP), R16 - // NOTE(rsc): No call16, because CALLFN needs four words - // of argument space to invoke callwritebarrier. DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -387,33 +385,27 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ PCDATA $PCDATA_StackMapIndex, $0; \ BL (R0); \ /* copy return values back */ \ + MOVD argtype+0(FP), R7; \ MOVD arg+16(FP), R3; \ MOVWU n+24(FP), R4; \ MOVWU retoffset+28(FP), R6; \ - MOVD RSP, R5; \ + ADD $8, RSP, R5; \ ADD R6, R5; \ ADD R6, R3; \ SUB R6, R4; \ - ADD $(8-1), R5; \ - SUB $1, R3; \ - ADD R5, R4; \ -loop: \ - CMP R5, R4; \ - BEQ end; \ - MOVBU.W 1(R5), R6; \ - MOVBU.W R6, 1(R3); \ - B loop; \ -end: \ - /* execute write barrier updates */ \ - MOVD argtype+0(FP), R7; \ - MOVD arg+16(FP), R3; \ - MOVWU n+24(FP), R4; \ - MOVWU retoffset+28(FP), R6; \ - MOVD R7, 8(RSP); \ - MOVD R3, 16(RSP); \ - MOVD R4, 24(RSP); \ - MOVD R6, 32(RSP); \ - BL runtime·callwritebarrier(SB); \ + BL callRet<>(SB); \ + RET + +// callRet copies return values back at the end of call*. This is a +// separate function so it can allocate stack space for the arguments +// to reflectcallmove. It does not follow the Go ABI; it expects its +// arguments in registers. +TEXT callRet<>(SB), NOSPLIT, $40-0 + MOVD R7, 8(RSP) + MOVD R3, 16(RSP) + MOVD R5, 24(RSP) + MOVD R4, 32(RSP) + BL runtime·reflectcallmove(SB) RET // These have 8 added to make the overall frame size a multiple of 16, diff --git a/src/runtime/asm_mips64x.s b/src/runtime/asm_mips64x.s index 79378df22c..4666741f28 100644 --- a/src/runtime/asm_mips64x.s +++ b/src/runtime/asm_mips64x.s @@ -309,8 +309,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0 TEXT ·reflectcall(SB), NOSPLIT, $-8-32 MOVWU argsize+24(FP), R1 - // NOTE(rsc): No call16, because CALLFN needs four words - // of argument space to invoke callwritebarrier. DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -361,33 +359,27 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ PCDATA $PCDATA_StackMapIndex, $0; \ JAL (R4); \ /* copy return values back */ \ + MOVV argtype+0(FP), R5; \ MOVV arg+16(FP), R1; \ MOVWU n+24(FP), R2; \ MOVWU retoffset+28(FP), R4; \ - MOVV R29, R3; \ + ADDV $8, R29, R3; \ ADDV R4, R3; \ ADDV R4, R1; \ SUBVU R4, R2; \ - ADDV $8, R3; \ - ADDV R3, R2; \ -loop: \ - BEQ R3, R2, end; \ - MOVBU (R3), R4; \ - ADDV $1, R3; \ - MOVBU R4, (R1); \ - ADDV $1, R1; \ - JMP loop; \ -end: \ - /* execute write barrier updates */ \ - MOVV argtype+0(FP), R5; \ - MOVV arg+16(FP), R1; \ - MOVWU n+24(FP), R2; \ - MOVWU retoffset+28(FP), R4; \ - MOVV R5, 8(R29); \ - MOVV R1, 16(R29); \ - MOVV R2, 24(R29); \ - MOVV R4, 32(R29); \ - JAL runtime·callwritebarrier(SB); \ + JAL callRet<>(SB); \ + RET + +// callRet copies return values back at the end of call*. This is a +// separate function so it can allocate stack space for the arguments +// to reflectcallmove. It does not follow the Go ABI; it expects its +// arguments in registers. +TEXT callRet<>(SB), NOSPLIT, $32-0 + MOVV R5, 8(R29) + MOVV R1, 16(R29) + MOVV R3, 24(R29) + MOVV R2, 32(R29) + JAL runtime·reflectcallmove(SB) RET CALLFN(·call16, 16) diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s index 1ce7b2d903..7571116957 100644 --- a/src/runtime/asm_ppc64x.s +++ b/src/runtime/asm_ppc64x.s @@ -363,8 +363,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0 TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32 MOVWZ argsize+24(FP), R3 - // NOTE(rsc): No call16, because CALLFN needs four words - // of argument space to invoke callwritebarrier. DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -418,33 +416,27 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ BL (CTR); \ MOVD 24(R1), R2; \ /* copy return values back */ \ + MOVD argtype+0(FP), R7; \ MOVD arg+16(FP), R3; \ MOVWZ n+24(FP), R4; \ MOVWZ retoffset+28(FP), R6; \ - MOVD R1, R5; \ + ADD $FIXED_FRAME, R1, R5; \ ADD R6, R5; \ ADD R6, R3; \ SUB R6, R4; \ - ADD $(FIXED_FRAME-1), R5; \ - SUB $1, R3; \ - ADD R5, R4; \ -loop: \ - CMP R5, R4; \ - BEQ end; \ - MOVBZU 1(R5), R6; \ - MOVBZU R6, 1(R3); \ - BR loop; \ -end: \ - /* execute write barrier updates */ \ - MOVD argtype+0(FP), R7; \ - MOVD arg+16(FP), R3; \ - MOVWZ n+24(FP), R4; \ - MOVWZ retoffset+28(FP), R6; \ - MOVD R7, FIXED_FRAME+0(R1); \ - MOVD R3, FIXED_FRAME+8(R1); \ - MOVD R4, FIXED_FRAME+16(R1); \ - MOVD R6, FIXED_FRAME+24(R1); \ - BL runtime·callwritebarrier(SB); \ + BL callRet<>(SB); \ + RET + +// callRet copies return values back at the end of call*. This is a +// separate function so it can allocate stack space for the arguments +// to reflectcallmove. It does not follow the Go ABI; it expects its +// arguments in registers. +TEXT callRet<>(SB), NOSPLIT, $32-0 + MOVD R7, FIXED_FRAME+0(R1) + MOVD R3, FIXED_FRAME+8(R1) + MOVD R5, FIXED_FRAME+16(R1) + MOVD R4, FIXED_FRAME+24(R1) + BL runtime·reflectcallmove(SB) RET CALLFN(·call32, 32) diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s index 36fe56f60b..1dcee7cf4e 100644 --- a/src/runtime/asm_s390x.s +++ b/src/runtime/asm_s390x.s @@ -320,8 +320,6 @@ TEXT reflect·call(SB), NOSPLIT, $0-0 TEXT ·reflectcall(SB), NOSPLIT, $-8-32 MOVWZ argsize+24(FP), R3 - // NOTE(rsc): No call16, because CALLFN needs four words - // of argument space to invoke callwritebarrier. DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) DISPATCH(runtime·call128, 128) @@ -377,6 +375,7 @@ callFunction: \ PCDATA $PCDATA_StackMapIndex, $0; \ BL (R8); \ /* copy return values back */ \ + MOVD argtype+0(FP), R7; \ MOVD arg+16(FP), R6; \ MOVWZ n+24(FP), R5; \ MOVD $stack-MAXSIZE(SP), R4; \ @@ -384,27 +383,19 @@ callFunction: \ ADD R1, R4; \ ADD R1, R6; \ SUB R1, R5; \ -loopRets: /* copy 256 bytes at a time */ \ - CMP R5, $256; \ - BLT tailRets; \ - SUB $256, R5; \ - MVC $256, 0(R4), 0(R6); \ - MOVD $256(R4), R4; \ - MOVD $256(R6), R6; \ - BR loopRets; \ -tailRets: /* copy remaining bytes */ \ - CMP R5, $0; \ - BEQ writeBarrierUpdates; \ - SUB $1, R5; \ - EXRL $callfnMVC<>(SB), R5; \ -writeBarrierUpdates: \ - /* execute write barrier updates */ \ - MOVD argtype+0(FP), R1; \ - MOVD arg+16(FP), R2; \ - MOVWZ n+24(FP), R3; \ - MOVWZ retoffset+28(FP), R4; \ - STMG R1, R4, stack-MAXSIZE(SP); \ - BL runtime·callwritebarrier(SB); \ + BL callRet<>(SB); \ + RET + +// callRet copies return values back at the end of call*. This is a +// separate function so it can allocate stack space for the arguments +// to reflectcallmove. It does not follow the Go ABI; it expects its +// arguments in registers. +TEXT callRet<>(SB), NOSPLIT, $32-0 + MOVD R7, 8(R15) + MOVD R6, 16(R15) + MOVD R4, 24(R15) + MOVD R5, 32(R15) + BL runtime·reflectcallmove(SB) RET CALLFN(·call32, 32) diff --git a/src/runtime/mbarrier.go b/src/runtime/mbarrier.go index ac00fc6a9e..90f730ee12 100644 --- a/src/runtime/mbarrier.go +++ b/src/runtime/mbarrier.go @@ -215,19 +215,21 @@ func reflect_typedmemmovepartial(typ *_type, dst, src unsafe.Pointer, off, size heapBitsBulkBarrier(uintptr(dst), size&^(sys.PtrSize-1)) } -// callwritebarrier is invoked at the end of reflectcall, to execute -// write barrier operations to record the fact that a call's return -// values have just been copied to frame, starting at retoffset -// and continuing to framesize. The entire frame (not just the return -// values) is described by typ. Because the copy has already -// happened, we call writebarrierptr_nostore, and this is nosplit so -// the copy and write barrier appear atomic to GC. +// reflectcallmove is invoked by reflectcall to copy the return values +// out of the stack and into the heap, invoking the necessary write +// barriers. dst, src, and size describe the return value area to +// copy. typ describes the entire frame (not just the return values). +// typ may be nil, which indicates write barriers are not needed. +// +// It must be nosplit and must only call nosplit functions because the +// stack map of reflectcall is wrong. +// //go:nosplit -func callwritebarrier(typ *_type, frame unsafe.Pointer, framesize, retoffset uintptr) { - if !writeBarrier.needed || typ == nil || typ.kind&kindNoPointers != 0 || framesize-retoffset < sys.PtrSize { - return +func reflectcallmove(typ *_type, dst, src unsafe.Pointer, size uintptr) { + memmove(dst, src, size) + if writeBarrier.needed && typ != nil && typ.kind&kindNoPointers == 0 && size >= sys.PtrSize { + heapBitsBulkBarrier(uintptr(dst), size) } - heapBitsBulkBarrier(uintptr(add(frame, retoffset)), framesize-retoffset) } //go:nosplit -- 2.48.1