From b30fa1bcc411f3a65a6e8f40ff3acdb1526ce0d0 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Thu, 24 Apr 2025 11:10:05 -0700 Subject: [PATCH] runtime: improve scan inner loop On every arch except amd64, it is faster to do x&(x-1) than x^(1< Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI --- src/runtime/mbitmap.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 7d528b94b4..f9a4c4ce3d 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -219,8 +219,13 @@ func (tp typePointers) nextFast() (typePointers, uintptr) { } else { i = sys.TrailingZeros32(uint32(tp.mask)) } - // BTCQ - tp.mask ^= uintptr(1) << (i & (ptrBits - 1)) + if GOARCH == "amd64" { + // BTCQ + tp.mask ^= uintptr(1) << (i & (ptrBits - 1)) + } else { + // SUB, AND + tp.mask &= tp.mask - 1 + } // LEAQ (XX)(XX*8) return tp, tp.addr + uintptr(i)*goarch.PtrSize } -- 2.51.0