runtime: use multiply instead of divide in heapBitsForObject

author Russ Cox <rsc@golang.org>

Wed, 4 Mar 2015 16:34:50 +0000 (11:34 -0500)

committer Russ Cox <rsc@golang.org>

Wed, 4 Mar 2015 17:46:47 +0000 (17:46 +0000)
author Russ Cox <rsc@golang.org>
Wed, 4 Mar 2015 16:34:50 +0000 (11:34 -0500)
committer Russ Cox <rsc@golang.org>
Wed, 4 Mar 2015 17:46:47 +0000 (17:46 +0000)
diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go

index 45920443639c9b776e448654905a2fbbd649fc82..6b46ad18cbfb1227cb302ae04c8e403138a73767 100644 (file)
--- a/src/runtime/mbitmap.go
+++ b/src/runtime/mbitmap.go
@@ -202,7 +202,19 @@ func heapBitsForObject(p uintptr) (base uintptr, hbits heapBits) {
         }
         base = s.base()
         if p-base >= s.elemsize {
-               base += (p - base) / s.elemsize * s.elemsize
+               // n := (p - base) / s.elemsize, using division by multiplication
+               n := uintptr(uint64(p-base) >> s.divShift * uint64(s.divMul) >> s.divShift2)
+
+               const debugMagic = false
+               if debugMagic {
+                       n2 := (p - base) / s.elemsize
+                       if n != n2 {
+                               println("runtime: bad div magic", (p - base), s.elemsize, s.divShift, s.divMul, s.divShift2)
+                               throw("bad div magic")
+                       }
+               }
+
+               base += n * s.elemsize
         }
         if base == p {
                 print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), "\n")
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go

index 94ef4de56aae74eb7ed1adabb82a8afe54750518..fc4dfeea975c6016eba34fa31d5776fa7665d0da 100644 (file)
--- a/src/runtime/mheap.go
+++ b/src/runtime/mheap.go
@@ -101,11 +101,14 @@ type mspan struct {
         // if sweepgen == h->sweepgen, the span is swept and ready to use
         // h->sweepgen is incremented by 2 after every GC
         sweepgen    uint32
+       divMul      uint32   // for divide by elemsize - divMagic.mul
         ref         uint16   // capacity - number of objects in freelist
         sizeclass   uint8    // size class
         incache     bool     // being used by an mcache
         state       uint8    // mspaninuse etc
         needzero    uint8    // needs to be zeroed before allocation
+       divShift    uint8    // for divide by elemsize - divMagic.shift
+       divShift2   uint8    // for divide by elemsize - divMagic.shift2
         elemsize    uintptr  // computed from sizeclass or from npages
         unusedsince int64    // first time spotted by gc in mspanfree state
         npreleased  uintptr  // number of pages released to the os
@@ -385,8 +388,15 @@ func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan
                 s.sizeclass = uint8(sizeclass)
                 if sizeclass == 0 {
                         s.elemsize = s.npages << _PageShift
+                       s.divShift = 0
+                       s.divMul = 0
+                       s.divShift2 = 0
                 } else {
                         s.elemsize = uintptr(class_to_size[sizeclass])
+                       m := &class_to_divmagic[sizeclass]
+                       s.divShift = m.shift
+                       s.divMul = m.mul
+                       s.divShift2 = m.shift2
                 }
  
                 // update stats, sweep lists
diff --git a/src/runtime/msize.go b/src/runtime/msize.go

index 370cae629e237b29031dde73c9927685bf643a59..f2a7cb9dddf0ce9d84b95b3edd686ab1975a2632 100644 (file)
--- a/src/runtime/msize.go
+++ b/src/runtime/msize.go
@@ -48,6 +48,8 @@ package runtime
  
  var class_to_size [_NumSizeClasses]int32
  var class_to_allocnpages [_NumSizeClasses]int32
+var class_to_divmagic [_NumSizeClasses]divMagic
+
  var size_to_class8 [1024/8 + 1]int8
  var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
  
@@ -144,6 +146,11 @@ func initSizes() {
         for i := 0; i < len(class_to_size); i++ {
                 memstats.by_size[i].size = uint32(class_to_size[i])
         }
+
+       for i := 1; i < len(class_to_size); i++ {
+               class_to_divmagic[i] = computeDivMagic(uint32(class_to_size[i]))
+       }
+
         return
  
  dump:
@@ -182,3 +189,55 @@ func roundupsize(size uintptr) uintptr {
         }
         return round(size, _PageSize)
  }
+
+// divMagic holds magic constants to implement division
+// by a particular constant as a shift, multiply, and shift.
+// That is, given
+//     m = computeMagic(d)
+// then
+//     n/d == ((n>>m.shift) * m.mul) >> m.shift2
+//
+// The magic computation picks m such that
+//     d = d₁*d₂
+//     d₂= 2^m.shift
+//     m.mul = ⌈2^m.shift2 / d₁⌉
+//
+// The magic computation here is tailored for malloc block sizes
+// and does not handle arbitrary d correctly. Malloc block sizes d are
+// always even, so the first shift implements the factors of 2 in d
+// and then the mul and second shift implement the odd factor
+// that remains. Because the first shift divides n by at least 2 (actually 8)
+// before the multiply gets involved, the huge corner cases that
+// require additional adjustment are impossible, so the usual
+// fixup is not needed.
+//
+// For more details see Hacker's Delight, Chapter 10, and
+// http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
+// http://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
+type divMagic struct {
+       shift  uint8
+       mul    uint32
+       shift2 uint8
+}
+
+func computeDivMagic(d uint32) divMagic {
+       var m divMagic
+
+       // Compute pre-shift by factoring power of 2 out of d.
+       for d&1 == 0 {
+               m.shift++
+               d >>= 1
+       }
+
+       // Compute largest k such that ⌈2^k / d⌉ fits in a 32-bit int.
+       // This is always a good enough approximation.
+       // We could use smaller k for some divisors but there's no point.
+       k := uint8(63)
+       d64 := uint64(d)
+       for ((1<<k)+d64-1)/d64 >= 1<<32 {
+               k--
+       }
+       m.mul = uint32(((1 << k) + d64 - 1) / d64) //  ⌈2^k / d⌉
+       m.shift2 = k
+       return m
+}
author	Russ Cox <rsc@golang.org>
	Wed, 4 Mar 2015 16:34:50 +0000 (11:34 -0500)
committer	Russ Cox <rsc@golang.org>
	Wed, 4 Mar 2015 17:46:47 +0000 (17:46 +0000)
src/runtime/mbitmap.go		patch \| blob \| history
src/runtime/mheap.go		patch \| blob \| history
src/runtime/msize.go		patch \| blob \| history