From b71ed4edc6a5663caac434f9c2bea47dbc37db15 Mon Sep 17 00:00:00 2001
From: =?utf8?q?Martin=20M=C3=B6hrmann?= <moehrmann@google.com>
Date: Sat, 11 Mar 2017 08:48:56 +0100
Subject: [PATCH] strconv: fix performance regression in integer formatting on
 32bit platforms
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Some of the changes in CL golang.org/cl/38071/ assumed that / and %
could always be combined to use only one DIV instruction. However,
this is not the case for 64bit operands on a 32bit platform which use
seperate runtime functions to calculate division and modulo.

This CL restores the original optimizations that help on 32bit platforms
with negligible impact on 64bit platforms.

386:
name          old time/op  new time/op  delta
FormatInt-2   6.06Âµs Â± 0%  6.02Âµs Â± 0%  -0.70%  (p=0.000 n=20+20)
AppendInt-2   4.98Âµs Â± 0%  4.98Âµs Â± 0%    ~     (p=0.747 n=18+18)
FormatUint-2  1.93Âµs Â± 0%  1.85Âµs Â± 0%  -4.19%  (p=0.000 n=20+20)
AppendUint-2  1.71Âµs Â± 0%  1.64Âµs Â± 0%  -3.68%  (p=0.000 n=20+20)

amd64:
name          old time/op  new time/op  delta
FormatInt-2   2.41Âµs Â± 0%  2.41Âµs Â± 0%  -0.09%  (p=0.010 n=18+18)
AppendInt-2   1.77Âµs Â± 0%  1.77Âµs Â± 0%  +0.08%  (p=0.000 n=18+18)
FormatUint-2   653ns Â± 1%   653ns Â± 0%    ~     (p=0.178 n=20+20)
AppendUint-2   514ns Â± 0%   513ns Â± 0%  -0.13%  (p=0.000 n=20+17)

Change-Id: I574a18e54fb41b25fbe51ce696e7a8765abc79a6
Reviewed-on: https://go-review.googlesource.com/38051
Run-TryBot: Martin MÃ¶hrmann <moehrmann@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
---
 src/strconv/itoa.go | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/strconv/itoa.go b/src/strconv/itoa.go
index 0cbbf06594..d0b4258d76 100644
--- a/src/strconv/itoa.go
+++ b/src/strconv/itoa.go
@@ -80,11 +80,11 @@ func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s
 		if host32bit {
 			// convert the lower digits using 32bit operations
 			for u >= 1e9 {
-				// the compiler recognizes q = a/b; r = a%b
-				// and produces only one DIV instruction;
-				// no need to be clever here
+				// Avoid using r = a%b in addition to q = a/b
+				// since 64bit division and modulo operations
+				// are calculated by runtime functions on 32bit machines.
 				q := u / 1e9
-				us := uint(u % 1e9) // u % 1e9 fits into a uint
+				us := uint(u - q*1e9) // u % 1e9 fits into a uint
 				for j := 9; j > 0; j-- {
 					i--
 					a[i] = byte(us%10 + '0')
@@ -117,19 +117,22 @@ func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s
 		}
 		// u < base
 		i--
-		a[i] = digits[u]
-
+		a[i] = digits[uint(u)]
 	} else {
 		// general case
 		b := uint64(base)
 		for u >= b {
 			i--
-			a[i] = digits[u%b]
-			u /= b
+			// Avoid using r = a%b in addition to q = a/b
+			// since 64bit division and modulo operations
+			// are calculated by runtime functions on 32bit machines.
+			q := u / b
+			a[i] = digits[uint(u-q*b)]
+			u = q
 		}
 		// u < base
 		i--
-		a[i] = digits[u]
+		a[i] = digits[uint(u)]
 	}
 
 	// add sign, if any
-- 
2.52.0