]> Cypherpunks repositories - gostls13.git/commitdiff
encoding/base32: optimize Encode
authorkorzhao <korzhao95@gmail.com>
Wed, 5 Jul 2023 12:23:35 +0000 (20:23 +0800)
committerGopher Robot <gobot@golang.org>
Mon, 7 Aug 2023 00:26:14 +0000 (00:26 +0000)
Converts the 5 x 8-bit source byte to two 32-bit integers.
This will reduce the number of shift operations.

benchmark                      old ns/op     new ns/op     delta
BenchmarkEncode-10             9005          4426          -50.85%
BenchmarkEncodeToString-10     10739         6155          -42.69%

benchmark                      old MB/s     new MB/s     speedup
BenchmarkEncode-10             909.69       1850.81      2.03x
BenchmarkEncodeToString-10     762.84       1331.02      1.74x

Change-Id: I9418d3436b73f94a4eb4b2b525e4f83612ff4d47
Reviewed-on: https://go-review.googlesource.com/c/go/+/514095
Reviewed-by: Michael Knyszek <mknyszek@google.com>
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>

src/encoding/base32/base32.go

index a4d515edbd7d4980b4e2cd1e247aa3ed6a323161..69ced9ca3c60452d9e9306a02cff9a071d007152 100644 (file)
@@ -109,77 +109,70 @@ func (enc Encoding) WithPadding(padding rune) *Encoding {
 // so Encode is not appropriate for use on individual blocks
 // of a large data stream. Use NewEncoder() instead.
 func (enc *Encoding) Encode(dst, src []byte) {
-       for len(src) > 0 {
-               var b [8]byte
-
-               // Unpack 8x 5-bit source blocks into a 5 byte
-               // destination quantum
-               switch len(src) {
-               default:
-                       b[7] = src[4] & 0x1F
-                       b[6] = src[4] >> 5
-                       fallthrough
-               case 4:
-                       b[6] |= (src[3] << 3) & 0x1F
-                       b[5] = (src[3] >> 2) & 0x1F
-                       b[4] = src[3] >> 7
-                       fallthrough
-               case 3:
-                       b[4] |= (src[2] << 1) & 0x1F
-                       b[3] = (src[2] >> 4) & 0x1F
-                       fallthrough
-               case 2:
-                       b[3] |= (src[1] << 4) & 0x1F
-                       b[2] = (src[1] >> 1) & 0x1F
-                       b[1] = (src[1] >> 6) & 0x1F
-                       fallthrough
-               case 1:
-                       b[1] |= (src[0] << 2) & 0x1F
-                       b[0] = src[0] >> 3
-               }
-
-               // Encode 5-bit blocks using the base32 alphabet
-               size := len(dst)
-               if size >= 8 {
-                       // Common case, unrolled for extra performance
-                       dst[0] = enc.encode[b[0]&31]
-                       dst[1] = enc.encode[b[1]&31]
-                       dst[2] = enc.encode[b[2]&31]
-                       dst[3] = enc.encode[b[3]&31]
-                       dst[4] = enc.encode[b[4]&31]
-                       dst[5] = enc.encode[b[5]&31]
-                       dst[6] = enc.encode[b[6]&31]
-                       dst[7] = enc.encode[b[7]&31]
-               } else {
-                       for i := 0; i < size; i++ {
-                               dst[i] = enc.encode[b[i]&31]
-                       }
-               }
+       if len(src) == 0 {
+               return
+       }
+       // enc is a pointer receiver, so the use of enc.encode within the hot
+       // loop below means a nil check at every operation. Lift that nil check
+       // outside of the loop to speed up the encoder.
+       _ = enc.encode
+
+       di, si := 0, 0
+       n := (len(src) / 5) * 5
+       for si < n {
+               // Combining two 32 bit loads allows the same code to be used
+               // for 32 and 64 bit platforms.
+               hi := uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])
+               lo := hi<<8 | uint32(src[si+4])
+
+               dst[di+0] = enc.encode[(hi>>27)&0x1F]
+               dst[di+1] = enc.encode[(hi>>22)&0x1F]
+               dst[di+2] = enc.encode[(hi>>17)&0x1F]
+               dst[di+3] = enc.encode[(hi>>12)&0x1F]
+               dst[di+4] = enc.encode[(hi>>7)&0x1F]
+               dst[di+5] = enc.encode[(hi>>2)&0x1F]
+               dst[di+6] = enc.encode[(lo>>5)&0x1F]
+               dst[di+7] = enc.encode[(lo)&0x1F]
+
+               si += 5
+               di += 8
+       }
 
-               // Pad the final quantum
-               if len(src) < 5 {
-                       if enc.padChar == NoPadding {
-                               break
-                       }
+       // Add the remaining small block
+       remain := len(src) - si
+       if remain == 0 {
+               return
+       }
 
-                       dst[7] = byte(enc.padChar)
-                       if len(src) < 4 {
-                               dst[6] = byte(enc.padChar)
-                               dst[5] = byte(enc.padChar)
-                               if len(src) < 3 {
-                                       dst[4] = byte(enc.padChar)
-                                       if len(src) < 2 {
-                                               dst[3] = byte(enc.padChar)
-                                               dst[2] = byte(enc.padChar)
-                                       }
-                               }
-                       }
+       // Encode the remaining bytes in reverse order.
+       val := uint32(0)
+       switch remain {
+       case 4:
+               val |= uint32(src[si+3])
+               dst[di+6] = enc.encode[val<<3&0x1F]
+               dst[di+5] = enc.encode[val>>2&0x1F]
+               fallthrough
+       case 3:
+               val |= uint32(src[si+2]) << 8
+               dst[di+4] = enc.encode[val>>7&0x1F]
+               fallthrough
+       case 2:
+               val |= uint32(src[si+1]) << 16
+               dst[di+3] = enc.encode[val>>12&0x1F]
+               dst[di+2] = enc.encode[val>>17&0x1F]
+               fallthrough
+       case 1:
+               val |= uint32(src[si+0]) << 24
+               dst[di+1] = enc.encode[val>>22&0x1F]
+               dst[di+0] = enc.encode[val>>27&0x1F]
+       }
 
-                       break
+       // Pad the final quantum
+       if enc.padChar != NoPadding {
+               nPad := (remain * 8 / 5) + 1
+               for i := nPad; i < 8; i++ {
+                       dst[di+i] = byte(enc.padChar)
                }
-
-               src = src[5:]
-               dst = dst[8:]
        }
 }