From 397d2117ec5ca7a96324f02dcfc24d8f00b067a0 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 14 Jul 2025 19:13:27 +0000 Subject: [PATCH] runtime: merge inline mark bits with gcmarkBits 8 bytes at a time Currently, with Green Tea GC, we need to copy (really bitwise-or) mark bits back into mspan.gcmarkBits, so that it can propagate to mspan.allocBits at sweep time. This function does actually seem to make sweeping small spans a good bit more expensive, though sweeping is still relatively cheap. There's some low-hanging fruit here though, in that the merge is performed one byte at a time, but this is pretty inefficient. We can almost as easily perform this merge one word at a time instead, which seems to make this operation about 33% faster. For #73581. Change-Id: I170d36e7a2193199c423dcd556cba048ebd698af Reviewed-on: https://go-review.googlesource.com/c/go/+/687935 Reviewed-by: Michael Pratt LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui Auto-Submit: Michael Knyszek --- src/runtime/mgcmark_greenteagc.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/runtime/mgcmark_greenteagc.go b/src/runtime/mgcmark_greenteagc.go index ac2b1732f9..3a368438d4 100644 --- a/src/runtime/mgcmark_greenteagc.go +++ b/src/runtime/mgcmark_greenteagc.go @@ -192,9 +192,13 @@ func (s *mspan) mergeInlineMarks(dst *gcBits) { } bytes := divRoundUp(uintptr(s.nelems), 8) imb := s.inlineMarkBits() - _ = imb.marks[bytes-1] - for i := uintptr(0); i < bytes; i++ { - *dst.bytep(i) |= imb.marks[i] + imbMarks := (*gc.ObjMask)(unsafe.Pointer(&imb.marks)) + for i := uintptr(0); i < bytes; i += goarch.PtrSize { + marks := bswapIfBigEndian(imbMarks[i/goarch.PtrSize]) + if i/goarch.PtrSize == uintptr(len(imb.marks)+1)/goarch.PtrSize-1 { + marks &^= 0xff << ((goarch.PtrSize - 1) * 8) // mask out class + } + *(*uintptr)(unsafe.Pointer(dst.bytep(i))) |= bswapIfBigEndian(marks) } if doubleCheckGreenTea && !s.spanclass.noscan() && imb.marks != imb.scans { throw("marks don't match scans for span with pointer") @@ -652,7 +656,7 @@ func spanSetScans(spanBase uintptr, nelems uint16, imb *spanInlineMarkBits, toSc marks := imbMarks[i/goarch.PtrSize] scans = bswapIfBigEndian(scans) marks = bswapIfBigEndian(marks) - if i/goarch.PtrSize == 64/goarch.PtrSize-1 { + if i/goarch.PtrSize == uintptr(len(imb.marks)+1)/goarch.PtrSize-1 { scans &^= 0xff << ((goarch.PtrSize - 1) * 8) // mask out owned marks &^= 0xff << ((goarch.PtrSize - 1) * 8) // mask out class } -- 2.50.0