"(*mspan).markBitsForIndex",
"(*muintptr).set",
"(*puintptr).set",
+ "(*wbBuf).get1",
+ "(*wbBuf).get2",
},
"runtime/internal/sys": {},
"runtime/internal/math": {
// faster than having the caller spill these.
MOVL CX, 20(SP)
MOVL BX, 24(SP)
+retry:
// TODO: Consider passing g.m.p in as an argument so they can be shared
// across a sequence of write barriers.
get_tls(BX)
MOVL (p_wbBuf+wbBuf_next)(BX), CX
// Increment wbBuf.next position.
LEAL 8(CX), CX
- MOVL CX, (p_wbBuf+wbBuf_next)(BX)
+ // Is the buffer full?
CMPL CX, (p_wbBuf+wbBuf_end)(BX)
+ JA flush
+ // Commit to the larger buffer.
+ MOVL CX, (p_wbBuf+wbBuf_next)(BX)
// Record the write.
MOVL AX, -8(CX) // Record value
MOVL (DI), BX // TODO: This turns bad writes into bad reads.
MOVL BX, -4(CX) // Record *slot
- // Is the buffer full? (flags set in CMPL above)
- JEQ flush
-ret:
MOVL 20(SP), CX
MOVL 24(SP), BX
// Do the write.
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
- MOVL DI, 0(SP) // Also first argument to wbBufFlush
- MOVL AX, 4(SP) // Also second argument to wbBufFlush
+ MOVL DI, 0(SP)
+ MOVL AX, 4(SP)
// BX already saved
// CX already saved
MOVL DX, 8(SP)
MOVL SI, 16(SP)
// DI already saved
- // This takes arguments DI and AX
CALL runtime·wbBufFlush(SB)
MOVL 0(SP), DI
MOVL 8(SP), DX
MOVL 12(SP), BP
MOVL 16(SP), SI
- JMP ret
+ JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated
// faster than having the caller spill these.
MOVQ R12, 96(SP)
MOVQ R13, 104(SP)
+retry:
// TODO: Consider passing g.m.p in as an argument so they can be shared
// across a sequence of write barriers.
MOVQ g_m(R14), R13
MOVQ m_p(R13), R13
+ // Get current buffer write position.
MOVQ (p_wbBuf+wbBuf_next)(R13), R12
// Increment wbBuf.next position.
LEAQ 16(R12), R12
- MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
+ // Is the buffer full?
CMPQ R12, (p_wbBuf+wbBuf_end)(R13)
+ JA flush
+ // Commit to the larger buffer.
+ MOVQ R12, (p_wbBuf+wbBuf_next)(R13)
// Record the write.
MOVQ AX, -16(R12) // Record value
// Note: This turns bad pointer writes into bad
// combine the read and the write.
MOVQ (DI), R13
MOVQ R13, -8(R12) // Record *slot
- // Is the buffer full? (flags set in CMPQ above)
- JEQ flush
-ret:
MOVQ 96(SP), R12
MOVQ 104(SP), R13
// Do the write.
//
// TODO: We could strike a different balance; e.g., saving X0
// and not saving GP registers that are less likely to be used.
- MOVQ DI, 0(SP) // Also first argument to wbBufFlush
- MOVQ AX, 8(SP) // Also second argument to wbBufFlush
+ MOVQ DI, 0(SP)
+ MOVQ AX, 8(SP)
MOVQ BX, 16(SP)
MOVQ CX, 24(SP)
MOVQ DX, 32(SP)
// R14 is g
MOVQ R15, 88(SP)
- // This takes arguments DI and AX
CALL runtime·wbBufFlush(SB)
MOVQ 0(SP), DI
MOVQ 72(SP), R10
MOVQ 80(SP), R11
MOVQ 88(SP), R15
- JMP ret
+ JMP retry
// gcWriteBarrierCX is gcWriteBarrier, but with args in DI and CX.
// Defined as ABIInternal since it does not use the stable Go ABI.
TEXT runtime·gcWriteBarrier(SB),NOSPLIT|NOFRAME,$0
// Save the registers clobbered by the fast path.
MOVM.DB.W [R0,R1], (R13)
+retry:
MOVW g_m(g), R0
MOVW m_p(R0), R0
MOVW (p_wbBuf+wbBuf_next)(R0), R1
+ MOVW (p_wbBuf+wbBuf_end)(R0), R11
// Increment wbBuf.next position.
ADD $8, R1
+ // Is the buffer full?
+ CMP R11, R1
+ BHI flush
+ // Commit to the larger buffer.
MOVW R1, (p_wbBuf+wbBuf_next)(R0)
- MOVW (p_wbBuf+wbBuf_end)(R0), R0
- CMP R1, R0
// Record the write.
MOVW R3, -8(R1) // Record value
MOVW (R2), R0 // TODO: This turns bad writes into bad reads.
MOVW R0, -4(R1) // Record *slot
- // Is the buffer full? (flags set in CMP above)
- B.EQ flush
-ret:
MOVM.IA.W (R13), [R0,R1]
// Do the write.
MOVW R3, (R2)
// R11 is linker temp, so no need to save.
// R13 is stack pointer.
// R15 is PC.
- //
- // This also sets up R2 and R3 as the arguments to wbBufFlush.
MOVM.DB.W [R2-R9,R12], (R13)
// Save R14 (LR) because the fast path above doesn't save it,
- // but needs it to RET. This is after the MOVM so it appears below
- // the arguments in the stack frame.
+ // but needs it to RET.
MOVM.DB.W [R14], (R13)
- // This takes arguments R2 and R3.
CALL runtime·wbBufFlush(SB)
MOVM.IA.W (R13), [R14]
MOVM.IA.W (R13), [R2-R9,R12]
- JMP ret
+ JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated
// - R2 is the destination of the write
// - R3 is the value being written at R2
// It clobbers condition codes.
-// It does not clobber any general-purpose registers,
+// It does not clobber any general-purpose registers except R27,
// but may clobber others (e.g., floating point registers)
// The act of CALLing gcWriteBarrier will clobber R30 (LR).
//
TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$200
// Save the registers clobbered by the fast path.
STP (R0, R1), 184(RSP)
+retry:
MOVD g_m(g), R0
MOVD m_p(R0), R0
- MOVD (p_wbBuf+wbBuf_next)(R0), R1
+ MOVD (p_wbBuf+wbBuf_next)(R0), R1
+ MOVD (p_wbBuf+wbBuf_end)(R0), R27
// Increment wbBuf.next position.
ADD $16, R1
+ // Is the buffer full?
+ CMP R27, R1
+ BHI flush
+ // Commit to the larger buffer.
MOVD R1, (p_wbBuf+wbBuf_next)(R0)
- MOVD (p_wbBuf+wbBuf_end)(R0), R0
- CMP R1, R0
// Record the write.
MOVD R3, -16(R1) // Record value
MOVD (R2), R0 // TODO: This turns bad writes into bad reads.
MOVD R0, -8(R1) // Record *slot
- // Is the buffer full? (flags set in CMP above)
- BEQ flush
-ret:
LDP 184(RSP), (R0, R1)
// Do the write.
MOVD R3, (R2)
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
// R0 and R1 already saved
- STP (R2, R3), 1*8(RSP) // Also first and second arguments to wbBufFlush
+ STP (R2, R3), 1*8(RSP)
STP (R4, R5), 3*8(RSP)
STP (R6, R7), 5*8(RSP)
STP (R8, R9), 7*8(RSP)
// R30 is LR, which was saved by the prologue.
// R31 is SP.
- // This takes arguments R2 and R3.
CALL runtime·wbBufFlush(SB)
LDP 1*8(RSP), (R2, R3)
LDP 3*8(RSP), (R4, R5)
LDP 17*8(RSP), (R21, R22)
LDP 19*8(RSP), (R23, R24)
LDP 21*8(RSP), (R25, R26)
- JMP ret
+ JMP retry
DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
// Save the registers clobbered by the fast path.
MOVV R19, 208(R3)
MOVV R13, 216(R3)
+retry:
MOVV g_m(g), R19
MOVV m_p(R19), R19
MOVV (p_wbBuf+wbBuf_next)(R19), R13
+ MOVV (p_wbBuf+wbBuf_end)(R19), R30 // R30 is linker temp register
// Increment wbBuf.next position.
ADDV $16, R13
+ // Is the buffer full?
+ BLTU R30, R13, flush
+ // Commit to the larger buffer.
MOVV R13, (p_wbBuf+wbBuf_next)(R19)
- MOVV (p_wbBuf+wbBuf_end)(R19), R19
- MOVV R19, R30 // R30 is linker temp register
// Record the write.
MOVV R28, -16(R13) // Record value
MOVV (R27), R19 // TODO: This turns bad writes into bad reads.
MOVV R19, -8(R13) // Record *slot
- // Is the buffer full?
- BEQ R13, R30, flush
-ret:
MOVV 208(R3), R19
MOVV 216(R3), R13
// Do the write.
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
- MOVV R27, 8(R3) // Also first argument to wbBufFlush
- MOVV R28, 16(R3) // Also second argument to wbBufFlush
+ MOVV R27, 8(R3)
+ MOVV R28, 16(R3)
// R1 is LR, which was saved by the prologue.
MOVV R2, 24(R3)
// R3 is SP.
// R30 is tmp register.
MOVV R31, 200(R3)
-
- // This takes arguments R27 and R28.
CALL runtime·wbBufFlush(SB)
MOVV 8(R3), R27
MOVV 184(R3), R26
MOVV 192(R3), R29
MOVV 200(R3), R31
- JMP ret
+ JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated
// Save the registers clobbered by the fast path.
MOVV R1, 184(R29)
MOVV R2, 192(R29)
+retry:
MOVV g_m(g), R1
MOVV m_p(R1), R1
MOVV (p_wbBuf+wbBuf_next)(R1), R2
+ MOVV (p_wbBuf+wbBuf_end)(R1), R23 // R23 is linker temp register
// Increment wbBuf.next position.
ADDV $16, R2
+ // Is the buffer full?
+ SGTU R2, R23, R23
+ BNE R23, flush
+ // Commit to the larger buffer.
MOVV R2, (p_wbBuf+wbBuf_next)(R1)
- MOVV (p_wbBuf+wbBuf_end)(R1), R1
- MOVV R1, R23 // R23 is linker temp register
// Record the write.
MOVV R21, -16(R2) // Record value
MOVV (R20), R1 // TODO: This turns bad writes into bad reads.
MOVV R1, -8(R2) // Record *slot
- // Is the buffer full?
- BEQ R2, R23, flush
-ret:
MOVV 184(R29), R1
MOVV 192(R29), R2
// Do the write.
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
- MOVV R20, 8(R29) // Also first argument to wbBufFlush
- MOVV R21, 16(R29) // Also second argument to wbBufFlush
+ MOVV R20, 8(R29)
+ MOVV R21, 16(R29)
// R1 already saved
// R2 already saved
MOVV R3, 24(R29)
// R30 is g.
// R31 is LR, which was saved by the prologue.
- // This takes arguments R20 and R21.
CALL runtime·wbBufFlush(SB)
MOVV 8(R29), R20
MOVV 160(R29), R22
MOVV 168(R29), R24
MOVV 176(R29), R25
- JMP ret
+ JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated
// Save the registers clobbered by the fast path.
MOVW R1, 100(R29)
MOVW R2, 104(R29)
+retry:
MOVW g_m(g), R1
MOVW m_p(R1), R1
MOVW (p_wbBuf+wbBuf_next)(R1), R2
+ MOVW (p_wbBuf+wbBuf_end)(R1), R23 // R23 is linker temp register
// Increment wbBuf.next position.
ADD $8, R2
+ // Is the buffer full?
+ SGTU R2, R23, R23
+ BNE R23, flush
+ // Commit to the larger buffer.
MOVW R2, (p_wbBuf+wbBuf_next)(R1)
- MOVW (p_wbBuf+wbBuf_end)(R1), R1
- MOVW R1, R23 // R23 is linker temp register
// Record the write.
MOVW R21, -8(R2) // Record value
MOVW (R20), R1 // TODO: This turns bad writes into bad reads.
MOVW R1, -4(R2) // Record *slot
- // Is the buffer full?
- BEQ R2, R23, flush
-ret:
MOVW 100(R29), R1
MOVW 104(R29), R2
// Do the write.
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
- MOVW R20, 4(R29) // Also first argument to wbBufFlush
- MOVW R21, 8(R29) // Also second argument to wbBufFlush
+ MOVW R20, 4(R29)
+ MOVW R21, 8(R29)
// R1 already saved
// R2 already saved
MOVW R3, 12(R29)
// R30 is g.
// R31 is LR, which was saved by the prologue.
- // This takes arguments R20 and R21.
CALL runtime·wbBufFlush(SB)
MOVW 4(R29), R20
MOVW 88(R29), R24
MOVW 92(R29), R25
MOVW 96(R29), R28
- JMP ret
+ JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated
// but may clobber any other register, *including* R31.
TEXT runtime·gcWriteBarrier<ABIInternal>(SB),NOSPLIT,$112
// The standard prologue clobbers R31.
- // We use R18 and R19 as scratch registers.
+ // We use R18, R19, and R31 as scratch registers.
+retry:
MOVD g_m(g), R18
MOVD m_p(R18), R18
MOVD (p_wbBuf+wbBuf_next)(R18), R19
+ MOVD (p_wbBuf+wbBuf_end)(R18), R31
// Increment wbBuf.next position.
ADD $16, R19
+ // Is the buffer full?
+ CMPU R31, R19
+ BLT flush
+ // Commit to the larger buffer.
MOVD R19, (p_wbBuf+wbBuf_next)(R18)
- MOVD (p_wbBuf+wbBuf_end)(R18), R18
- CMP R18, R19
// Record the write.
MOVD R21, -16(R19) // Record value
MOVD (R20), R18 // TODO: This turns bad writes into bad reads.
MOVD R18, -8(R19) // Record *slot
- // Is the buffer full? (flags set in CMP above)
- BEQ flush
-ret:
// Do the write.
MOVD R21, (R20)
RET
flush:
// Save registers R0 through R15 since these were not saved by the caller.
// We don't save all registers on ppc64 because it takes too much space.
- MOVD R20, (FIXED_FRAME+0)(R1) // Also first argument to wbBufFlush
- MOVD R21, (FIXED_FRAME+8)(R1) // Also second argument to wbBufFlush
+ MOVD R20, (FIXED_FRAME+0)(R1)
+ MOVD R21, (FIXED_FRAME+8)(R1)
// R0 is always 0, so no need to spill.
// R1 is SP.
// R2 is SB.
MOVD R16, (FIXED_FRAME+96)(R1)
MOVD R17, (FIXED_FRAME+104)(R1)
- // This takes arguments R20 and R21.
CALL runtime·wbBufFlush(SB)
MOVD (FIXED_FRAME+0)(R1), R20
MOVD (FIXED_FRAME+88)(R1), R15
MOVD (FIXED_FRAME+96)(R1), R16
MOVD (FIXED_FRAME+104)(R1), R17
- JMP ret
+ JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated
// gcWriteBarrier performs a heap pointer write and informs the GC.
//
-// gcWriteBarrier does NOT follow the Go ABI. It takes two arguments:
-// - T0 is the destination of the write
-// - T1 is the value being written at T0.
-// It clobbers R30 (the linker temp register - REG_TMP).
+// gcWriteBarrier does NOT follow the Go ABI. It accepts the
+// number of bytes of buffer needed in X24, and returns a pointer
+// to the buffer spcae in X24.
+// It clobbers X31 aka T6 (the linker temp register - REG_TMP).
// The act of CALLing gcWriteBarrier will clobber RA (LR).
// It does not clobber any other general-purpose registers,
// but may clobber others (e.g., floating point registers).
// Save the registers clobbered by the fast path.
MOV A0, 24*8(X2)
MOV A1, 25*8(X2)
+retry:
MOV g_m(g), A0
MOV m_p(A0), A0
MOV (p_wbBuf+wbBuf_next)(A0), A1
+ MOV (p_wbBuf+wbBuf_end)(A0), T6 // T6 is linker temp register (REG_TMP)
// Increment wbBuf.next position.
ADD $16, A1
+ // Is the buffer full?
+ BLTU T6, A1, flush
+ // Commit to the larger buffer.
MOV A1, (p_wbBuf+wbBuf_next)(A0)
- MOV (p_wbBuf+wbBuf_end)(A0), A0
- MOV A0, T6 // T6 is linker temp register (REG_TMP)
// Record the write.
MOV T1, -16(A1) // Record value
MOV (T0), A0 // TODO: This turns bad writes into bad reads.
MOV A0, -8(A1) // Record *slot
- // Is the buffer full?
- BEQ A1, T6, flush
-ret:
MOV 24*8(X2), A0
MOV 25*8(X2), A1
// Do the write.
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
- MOV T0, 1*8(X2) // Also first argument to wbBufFlush
- MOV T1, 2*8(X2) // Also second argument to wbBufFlush
+ MOV T0, 1*8(X2)
+ MOV T1, 2*8(X2)
// X0 is zero register
// X1 is LR, saved by prologue
// X2 is SP
// X3 is GP
// X4 is TP
- // X5 is first arg to wbBufFlush (T0)
- // X6 is second arg to wbBufFlush (T1)
MOV X7, 3*8(X2)
MOV X8, 4*8(X2)
MOV X9, 5*8(X2)
MOV X30, 23*8(X2)
// X31 is tmp register.
- // This takes arguments T0 and T1.
CALL runtime·wbBufFlush(SB)
MOV 1*8(X2), T0
MOV 22*8(X2), X29
MOV 23*8(X2), X30
- JMP ret
+ JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers (ssa/gen/RISCV64Ops.go), but the space for those
TEXT runtime·gcWriteBarrier(SB),NOSPLIT,$96
// Save the registers clobbered by the fast path.
MOVD R4, 96(R15)
+retry:
MOVD g_m(g), R1
MOVD m_p(R1), R1
// Increment wbBuf.next position.
MOVD $16, R4
ADD (p_wbBuf+wbBuf_next)(R1), R4
+ // Is the buffer full?
+ MOVD (p_wbBuf+wbBuf_end)(R1), R10
+ CMPUBGT R4, R10, flush
+ // Commit to the larger buffer.
MOVD R4, (p_wbBuf+wbBuf_next)(R1)
- MOVD (p_wbBuf+wbBuf_end)(R1), R1
// Record the write.
MOVD R3, -16(R4) // Record value
MOVD (R2), R10 // TODO: This turns bad writes into bad reads.
MOVD R10, -8(R4) // Record *slot
- // Is the buffer full?
- CMPBEQ R4, R1, flush
-ret:
MOVD 96(R15), R4
// Do the write.
MOVD R3, (R2)
flush:
// Save all general purpose registers since these could be
// clobbered by wbBufFlush and were not saved by the caller.
- STMG R2, R3, 8(R15) // set R2 and R3 as arguments for wbBufFlush
+ STMG R2, R3, 8(R15)
MOVD R0, 24(R15)
// R1 already saved.
// R4 already saved.
// R14 is LR.
// R15 is SP.
- // This takes arguments R2 and R3.
CALL runtime·wbBufFlush(SB)
LMG 8(R15), R2, R3 // restore R2 - R3
MOVD 24(R15), R0 // restore R0
LMG 32(R15), R5, R12 // restore R5 - R12
- JMP ret
+ JMP retry
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated
// R0: the destination of the write (i64)
// R1: the value being written (i64)
TEXT runtime·gcWriteBarrier(SB), NOSPLIT, $16
- // R3 = g.m
- MOVD g_m(g), R3
- // R4 = p
- MOVD m_p(R3), R4
- // R5 = wbBuf.next
- MOVD p_wbBuf+wbBuf_next(R4), R5
-
- // Record value
- MOVD R1, 0(R5)
- // Record *slot
- MOVD (R0), 8(R5)
-
- // Increment wbBuf.next
- Get R5
- I64Const $16
- I64Add
- Set R5
- MOVD R5, p_wbBuf+wbBuf_next(R4)
-
- Get R5
- I64Load (p_wbBuf+wbBuf_end)(R4)
- I64Eq
- If
+ Loop
+ // R3 = g.m
+ MOVD g_m(g), R3
+ // R4 = p
+ MOVD m_p(R3), R4
+ // R5 = wbBuf.next
+ MOVD p_wbBuf+wbBuf_next(R4), R5
+
+ // Increment wbBuf.next
+ Get R5
+ I64Const $16
+ I64Add
+ Set R5
+
+ // Is the buffer full?
+ Get R5
+ I64Load (p_wbBuf+wbBuf_end)(R4)
+ I64LeU
+ If
+ // Commit to the larger buffer.
+ MOVD R5, p_wbBuf+wbBuf_next(R4)
+
+ // Back up to write position (wasm stores can't use negative offsets)
+ Get R5
+ I64Const $16
+ I64Sub
+ Set R5
+
+ // Record value
+ MOVD R1, 0(R5)
+ // Record *slot
+ MOVD (R0), 8(R5)
+
+ // Do the write
+ MOVD R1, (R0)
+
+ RET
+ End
+
// Flush
MOVD R0, 0(SP)
MOVD R1, 8(SP)
CALLNORESUME runtime·wbBufFlush(SB)
- End
+ MOVD 0(SP), R0
+ MOVD 8(SP), R1
- // Do the write
- MOVD R1, (R0)
-
- RET
+ // Retry
+ Br $0
+ End
//go:nosplit
func atomicwb(ptr *unsafe.Pointer, new unsafe.Pointer) {
slot := (*uintptr)(unsafe.Pointer(ptr))
- if !getg().m.p.ptr().wbBuf.putFast(*slot, uintptr(new)) {
- wbBufFlush()
- }
+ buf := getg().m.p.ptr().wbBuf.get2()
+ buf[0] = *slot
+ buf[1] = uintptr(new)
}
// atomicstorep performs *ptr = new atomically and invokes a write barrier.
break
}
dstx := (*uintptr)(unsafe.Pointer(addr))
- if !buf.putFast(*dstx, 0) {
- wbBufFlush()
- }
+ p := buf.get1()
+ p[0] = *dstx
}
} else {
for {
}
dstx := (*uintptr)(unsafe.Pointer(addr))
srcx := (*uintptr)(unsafe.Pointer(src + (addr - dst)))
- if !buf.putFast(*dstx, *srcx) {
- wbBufFlush()
- }
+ p := buf.get2()
+ p[0] = *dstx
+ p[1] = *srcx
}
}
}
break
}
srcx := (*uintptr)(unsafe.Pointer(addr - dst + src))
- if !buf.putFast(0, *srcx) {
- wbBufFlush()
- }
+ p := buf.get1()
+ p[0] = *srcx
}
}
if *bits&mask != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
if src == 0 {
- if !buf.putFast(*dstx, 0) {
- wbBufFlush()
- }
+ p := buf.get1()
+ p[0] = *dstx
} else {
srcx := (*uintptr)(unsafe.Pointer(src + i))
- if !buf.putFast(*dstx, *srcx) {
- wbBufFlush()
- }
+ p := buf.get2()
+ p[0] = *dstx
+ p[1] = *srcx
}
}
mask <<= 1
if bits&1 != 0 {
dstx := (*uintptr)(unsafe.Pointer(dst + i))
srcx := (*uintptr)(unsafe.Pointer(src + i))
- if !buf.putFast(*dstx, *srcx) {
- wbBufFlush()
- }
+ p := buf.get2()
+ p[0] = *dstx
+ p[1] = *srcx
}
}
}
// be updated without write barriers.
end uintptr
- // buf stores a series of pointers to execute write barriers
- // on. This must be a multiple of wbBufEntryPointers because
- // the write barrier only checks for overflow once per entry.
- buf [wbBufEntryPointers * wbBufEntries]uintptr
+ // buf stores a series of pointers to execute write barriers on.
+ buf [wbBufEntries]uintptr
}
const (
- // wbBufEntries is the number of write barriers between
- // flushes of the write barrier buffer.
+ // wbBufEntries is the maximum number of pointers that can be
+ // stored in the write barrier buffer.
//
// This trades latency for throughput amortization. Higher
// values amortize flushing overhead more, but increase the
// footprint of the buffer.
//
// TODO: What is the latency cost of this? Tune this value.
- wbBufEntries = 256
+ wbBufEntries = 512
- // wbBufEntryPointers is the number of pointers added to the
- // buffer by each write barrier.
- wbBufEntryPointers = 2
+ // Maximum number of entries that we need to ask from the
+ // buffer in a single call.
+ wbMaxEntriesPerCall = 2
)
// reset empties b by resetting its next and end pointers.
start := uintptr(unsafe.Pointer(&b.buf[0]))
b.next = start
if testSmallBuf {
- // For testing, allow two barriers in the buffer. If
- // we only did one, then barriers of non-heap pointers
- // would be no-ops. This lets us combine a buffered
- // barrier with a flush at a later time.
- b.end = uintptr(unsafe.Pointer(&b.buf[2*wbBufEntryPointers]))
+ // For testing, make the buffer smaller but more than
+ // 1 write barrier's worth, so it tests both the
+ // immediate flush and delayed flush cases.
+ b.end = uintptr(unsafe.Pointer(&b.buf[wbMaxEntriesPerCall+1]))
} else {
b.end = start + uintptr(len(b.buf))*unsafe.Sizeof(b.buf[0])
}
- if (b.end-b.next)%(wbBufEntryPointers*unsafe.Sizeof(b.buf[0])) != 0 {
+ if (b.end-b.next)%unsafe.Sizeof(b.buf[0]) != 0 {
throw("bad write barrier buffer bounds")
}
}
return b.next == uintptr(unsafe.Pointer(&b.buf[0]))
}
-// putFast adds old and new to the write barrier buffer and returns
-// false if a flush is necessary. Callers should use this as:
+// getX returns space in the write barrier buffer to store X pointers.
+// getX will flush the buffer if necessary. Callers should use this as:
//
// buf := &getg().m.p.ptr().wbBuf
-// if !buf.putFast(old, new) {
-// wbBufFlush()
-// }
+// p := buf.get2()
+// p[0], p[1] = old, new
// ... actual memory write ...
//
// The caller must ensure there are no preemption points during the
// could allow a GC phase change, which could result in missed write
// barriers.
//
-// putFast must be nowritebarrierrec to because write barriers here would
+// getX must be nowritebarrierrec to because write barriers here would
// corrupt the write barrier buffer. It (and everything it calls, if
// it called anything) has to be nosplit to avoid scheduling on to a
// different P and a different buffer.
//
//go:nowritebarrierrec
//go:nosplit
-func (b *wbBuf) putFast(old, new uintptr) bool {
+func (b *wbBuf) get1() *[1]uintptr {
+ if b.next+goarch.PtrSize > b.end {
+ wbBufFlush()
+ }
+ p := (*[1]uintptr)(unsafe.Pointer(b.next))
+ b.next += goarch.PtrSize
+ return p
+}
+
+//go:nowritebarrierrec
+//go:nosplit
+func (b *wbBuf) get2() *[2]uintptr {
+ if b.next+2*goarch.PtrSize > b.end {
+ wbBufFlush()
+ }
p := (*[2]uintptr)(unsafe.Pointer(b.next))
- p[0] = old
- p[1] = new
b.next += 2 * goarch.PtrSize
- return b.next != b.end
+ return p
}
// wbBufFlush flushes the current P's write barrier buffer to the GC
// Note: Every possible return from this function must reset
// the buffer's next pointer to prevent buffer overflow.
- // This *must not* modify its arguments because this
- // function's argument slots do double duty in gcWriteBarrier
- // as register spill slots. Currently, not modifying the
- // arguments is sufficient to keep the spill slots unmodified
- // (which seems unlikely to change since it costs little and
- // helps with debugging).
-
if getg().m.dying > 0 {
// We're going down. Not much point in write barriers
// and this way we can allow write barriers in the
}
// Switch to the system stack so we don't have to worry about
- // the untyped stack slots or safe points.
+ // safe points.
systemstack(func() {
wbBufFlush1(getg().m.p.ptr())
})