From 6954be0baacd0f05edfd3015cc3ecfbf237b3967 Mon Sep 17 00:00:00 2001 From: Russ Cox Date: Mon, 24 Nov 2025 14:03:34 -0500 Subject: [PATCH] internal/strconv: delete ftoaryu MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit CL 700075 made this dead code. Benchmarks below for CL 700075, testing Dragonbox vs the old Ryu being deleted. The "Fixed" benchmarks are unchanged, which gives a sense of the noise level. benchmark \ host linux-amd64 s7 linux-arm64 local linux-386 s7:GOARCH=386 linux-arm vs base vs base vs base vs base vs base vs base vs base AppendFloat/Decimal -2.68% +2.76% +4.99% -7.44% +11.93% +10.51% +21.84% AppendFloat/Float -21.98% -13.32% -16.50% -11.54% -33.37% -28.66% -15.64% AppendFloat/Exp -32.44% -25.54% -28.85% -31.79% -39.60% -35.92% -20.89% AppendFloat/NegExp -33.31% -25.91% -28.90% -31.29% -41.17% -35.52% -21.32% AppendFloat/LongExp -19.35% -9.51% -15.29% -12.36% -30.46% -25.10% -10.18% AppendFloat/Big -24.40% -15.84% -22.56% -24.05% -43.23% -36.28% -26.45% AppendFloat/BinaryExp -0.52% -1.20% ~ ~ ~ +0.96% +1.94% AppendFloat/32Integer -14.24% -7.01% -12.82% -18.99% -12.12% -10.85% -0.32% AppendFloat/32ExactFraction -34.53% -28.47% -34.50% -30.50% -43.75% -38.73% -25.44% AppendFloat/32Point -25.83% -18.54% -23.52% -21.26% -36.74% -33.11% -20.72% AppendFloat/32Exp -37.55% -33.36% -37.74% -39.06% -51.37% -44.53% -31.76% AppendFloat/32NegExp -35.99% -31.96% -36.02% -37.13% -44.62% -39.03% -26.91% AppendFloat/32Shortest -23.25% -18.02% -21.41% -23.07% -35.56% -32.89% -20.13% AppendFloat/32Fixed8Hard +1.09% -1.94% ~ ~ -2.33% -1.36% -0.10% AppendFloat/32Fixed9Hard +1.45% -2.10% +0.10% ~ -4.20% -0.72% +1.31% AppendFloat/64Fixed1 +0.45% ~ ~ -1.66% -3.74% -2.13% ~ AppendFloat/64Fixed2 +0.32% -0.92% +0.53% -1.75% -2.69% ~ -0.49% AppendFloat/64Fixed2.5 +0.38% -0.38% ~ ~ -5.14% -1.15% -0.97% AppendFloat/64Fixed3 +0.97% -0.53% ~ +0.23% -3.57% -4.04% -0.27% AppendFloat/64Fixed4 +0.95% -2.77% +0.45% -1.57% -3.99% -2.58% -0.91% AppendFloat/64Fixed5Hard +0.52% -1.22% ~ -0.87% -3.20% -1.60% +0.49% AppendFloat/64Fixed12 +1.15% -0.62% ~ ~ -3.37% -1.43% -0.72% AppendFloat/64Fixed16 +1.13% ~ -0.21% -0.59% -3.65% ~ +0.74% AppendFloat/64Fixed12Hard +0.78% -1.26% ~ -0.95% -4.82% -2.98% +0.26% AppendFloat/64Fixed17Hard ~ ~ -0.32% -6.34% -2.44% -2.19% +1.00% AppendFloat/64Fixed18Hard ~ ~ ~ ~ ~ ~ +0.06% AppendFloat/64FixedF1 +0.44% ~ +0.43% -1.87% -2.75% ~ -1.24% AppendFloat/64FixedF2 +1.35% -1.04% +0.81% +1.26% -2.21% -2.36% ~ AppendFloat/64FixedF3 ~ -1.14% +0.39% -1.58% -3.46% ~ -1.08% AppendFloat/Slowpath64 -15.51% -7.05% -14.59% -7.86% -22.54% -19.63% -5.90% AppendFloat/SlowpathDenormal64 -15.10% -8.19% -14.62% -9.36% -26.86% -23.10% -14.48% host: linux-amd64 goos: linux goarch: amd64 pkg: internal/strconv cpu: Intel(R) Xeon(R) CPU @ 2.30GHz │ 3c26aef8fba │ 8a958b0d9c1 │ │ sec/op │ sec/op vs base │ AppendFloat/Decimal-16 63.37n ± 0% 61.67n ± 0% -2.68% (p=0.000 n=20) AppendFloat/Float-16 92.83n ± 0% 72.43n ± 0% -21.98% (p=0.000 n=20) AppendFloat/Exp-16 98.60n ± 0% 66.61n ± 0% -32.44% (p=0.000 n=20) AppendFloat/NegExp-16 100.15n ± 0% 66.79n ± 0% -33.31% (p=0.000 n=20) AppendFloat/LongExp-16 105.35n ± 0% 84.96n ± 0% -19.35% (p=0.000 n=20) AppendFloat/Big-16 108.50n ± 0% 82.03n ± 0% -24.40% (p=0.000 n=20) AppendFloat/BinaryExp-16 47.27n ± 0% 47.03n ± 0% -0.52% (p=0.000 n=20) AppendFloat/32Integer-16 63.29n ± 0% 54.28n ± 0% -14.24% (p=0.000 n=20) AppendFloat/32ExactFraction-16 89.72n ± 0% 58.74n ± 0% -34.53% (p=0.000 n=20) AppendFloat/32Point-16 87.32n ± 0% 64.77n ± 0% -25.83% (p=0.000 n=20) AppendFloat/32Exp-16 94.89n ± 0% 59.26n ± 0% -37.55% (p=0.000 n=20) AppendFloat/32NegExp-16 92.68n ± 0% 59.32n ± 0% -35.99% (p=0.000 n=20) AppendFloat/32Shortest-16 82.12n ± 0% 63.04n ± 0% -23.25% (p=0.000 n=20) AppendFloat/32Fixed8Hard-16 57.76n ± 0% 58.38n ± 0% +1.09% (p=0.000 n=20) AppendFloat/32Fixed9Hard-16 66.44n ± 0% 67.41n ± 0% +1.45% (p=0.000 n=20) AppendFloat/64Fixed1-16 51.00n ± 0% 51.24n ± 0% +0.45% (p=0.000 n=20) AppendFloat/64Fixed2-16 50.86n ± 0% 51.03n ± 0% +0.32% (p=0.000 n=20) AppendFloat/64Fixed2.5-16 49.31n ± 0% 49.49n ± 0% +0.38% (p=0.000 n=20) AppendFloat/64Fixed3-16 51.98n ± 0% 52.48n ± 0% +0.97% (p=0.000 n=20) AppendFloat/64Fixed4-16 50.05n ± 0% 50.52n ± 0% +0.95% (p=0.000 n=20) AppendFloat/64Fixed5Hard-16 58.01n ± 0% 58.31n ± 0% +0.52% (p=0.000 n=20) AppendFloat/64Fixed12-16 82.81n ± 0% 83.77n ± 0% +1.15% (p=0.000 n=20) AppendFloat/64Fixed16-16 70.66n ± 0% 71.46n ± 0% +1.13% (p=0.000 n=20) AppendFloat/64Fixed12Hard-16 68.25n ± 0% 68.79n ± 0% +0.78% (p=0.000 n=20) AppendFloat/64Fixed17Hard-16 79.78n ± 0% 79.82n ± 0% ~ (p=0.136 n=20) AppendFloat/64Fixed18Hard-16 4.881µ ± 0% 4.876µ ± 0% ~ (p=0.432 n=20) AppendFloat/64FixedF1-16 68.74n ± 0% 69.04n ± 0% +0.44% (p=0.000 n=20) AppendFloat/64FixedF2-16 57.36n ± 0% 58.13n ± 0% +1.35% (p=0.000 n=20) AppendFloat/64FixedF3-16 52.59n ± 0% 52.77n ± 0% ~ (p=0.001 n=20) AppendFloat/Slowpath64-16 99.56n ± 0% 84.12n ± 0% -15.51% (p=0.000 n=20) AppendFloat/SlowpathDenormal64-16 97.35n ± 0% 82.65n ± 0% -15.10% (p=0.000 n=20) AppendFloat/ShorterIntervalCase32-16 56.27n ± 0% AppendFloat/ShorterIntervalCase64-16 57.42n ± 0% geomean 82.53n 71.80n -11.68% host: s7 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 3c26aef8fba │ 8a958b0d9c1 │ │ sec/op │ sec/op vs base │ AppendFloat/Decimal-32 22.30n ± 0% 22.91n ± 0% +2.76% (p=0.000 n=20) AppendFloat/Float-32 34.54n ± 0% 29.94n ± 0% -13.32% (p=0.000 n=20) AppendFloat/Exp-32 34.55n ± 0% 25.72n ± 0% -25.54% (p=0.000 n=20) AppendFloat/NegExp-32 35.08n ± 0% 25.99n ± 1% -25.91% (p=0.000 n=20) AppendFloat/LongExp-32 36.85n ± 0% 33.35n ± 1% -9.51% (p=0.000 n=20) AppendFloat/Big-32 38.28n ± 0% 32.21n ± 1% -15.84% (p=0.000 n=20) AppendFloat/BinaryExp-32 17.52n ± 0% 17.30n ± 0% -1.20% (p=0.000 n=20) AppendFloat/32Integer-32 22.31n ± 0% 20.75n ± 0% -7.01% (p=0.000 n=20) AppendFloat/32ExactFraction-32 32.74n ± 1% 23.41n ± 1% -28.47% (p=0.000 n=20) AppendFloat/32Point-32 32.88n ± 0% 26.79n ± 0% -18.54% (p=0.000 n=20) AppendFloat/32Exp-32 34.10n ± 0% 22.72n ± 1% -33.36% (p=0.000 n=20) AppendFloat/32NegExp-32 33.17n ± 1% 22.57n ± 0% -31.96% (p=0.000 n=20) AppendFloat/32Shortest-32 29.85n ± 1% 24.47n ± 0% -18.02% (p=0.000 n=20) AppendFloat/32Fixed8Hard-32 22.62n ± 1% 22.19n ± 1% -1.94% (p=0.000 n=20) AppendFloat/32Fixed9Hard-32 25.75n ± 1% 25.21n ± 0% -2.10% (p=0.000 n=20) AppendFloat/64Fixed1-32 19.02n ± 1% 18.98n ± 0% ~ (p=0.351 n=20) AppendFloat/64Fixed2-32 18.94n ± 0% 18.76n ± 0% -0.92% (p=0.000 n=20) AppendFloat/64Fixed2.5-32 18.23n ± 0% 18.16n ± 0% -0.38% (p=0.001 n=20) AppendFloat/64Fixed3-32 19.79n ± 0% 19.68n ± 0% -0.53% (p=0.000 n=20) AppendFloat/64Fixed4-32 18.93n ± 0% 18.40n ± 1% -2.77% (p=0.000 n=20) AppendFloat/64Fixed5Hard-32 21.81n ± 0% 21.54n ± 1% -1.22% (p=0.000 n=20) AppendFloat/64Fixed12-32 30.58n ± 1% 30.39n ± 0% -0.62% (p=0.000 n=20) AppendFloat/64Fixed16-32 26.98n ± 1% 26.80n ± 1% ~ (p=0.010 n=20) AppendFloat/64Fixed12Hard-32 26.20n ± 0% 25.86n ± 1% -1.26% (p=0.000 n=20) AppendFloat/64Fixed17Hard-32 30.01n ± 1% 30.10n ± 1% ~ (p=0.112 n=20) AppendFloat/64Fixed18Hard-32 1.809µ ± 1% 1.806µ ± 0% ~ (p=0.713 n=20) AppendFloat/64FixedF1-32 26.78n ± 1% 26.59n ± 0% ~ (p=0.005 n=20) AppendFloat/64FixedF2-32 20.24n ± 1% 20.03n ± 0% -1.04% (p=0.000 n=20) AppendFloat/64FixedF3-32 18.88n ± 0% 18.67n ± 0% -1.14% (p=0.000 n=20) AppendFloat/Slowpath64-32 35.37n ± 0% 32.88n ± 1% -7.05% (p=0.000 n=20) AppendFloat/SlowpathDenormal64-32 35.17n ± 0% 32.29n ± 1% -8.19% (p=0.000 n=20) AppendFloat/ShorterIntervalCase32-32 21.76n ± 0% AppendFloat/ShorterIntervalCase64-32 22.11n ± 0% geomean 30.34n 27.23n -8.96% host: linux-arm64 goarch: arm64 cpu: unknown │ 3c26aef8fba │ 8a958b0d9c1 │ │ sec/op │ sec/op vs base │ AppendFloat/Decimal-8 60.08n ± 0% 63.07n ± 0% +4.99% (p=0.000 n=20) AppendFloat/Float-8 88.53n ± 0% 73.92n ± 0% -16.50% (p=0.000 n=20) AppendFloat/Exp-8 93.07n ± 0% 66.22n ± 0% -28.85% (p=0.000 n=20) AppendFloat/NegExp-8 93.35n ± 0% 66.38n ± 0% -28.90% (p=0.000 n=20) AppendFloat/LongExp-8 100.15n ± 0% 84.84n ± 0% -15.29% (p=0.000 n=20) AppendFloat/Big-8 103.80n ± 0% 80.38n ± 0% -22.56% (p=0.000 n=20) AppendFloat/BinaryExp-8 47.36n ± 0% 47.34n ± 0% ~ (p=0.033 n=20) AppendFloat/32Integer-8 60.28n ± 0% 52.55n ± 0% -12.82% (p=0.000 n=20) AppendFloat/32ExactFraction-8 86.11n ± 0% 56.40n ± 0% -34.50% (p=0.000 n=20) AppendFloat/32Point-8 82.88n ± 0% 63.39n ± 0% -23.52% (p=0.000 n=20) AppendFloat/32Exp-8 89.33n ± 0% 55.62n ± 0% -37.74% (p=0.000 n=20) AppendFloat/32NegExp-8 87.48n ± 0% 55.97n ± 0% -36.02% (p=0.000 n=20) AppendFloat/32Shortest-8 76.31n ± 0% 59.97n ± 0% -21.41% (p=0.000 n=20) AppendFloat/32Fixed8Hard-8 52.83n ± 0% 52.82n ± 0% ~ (p=0.370 n=20) AppendFloat/32Fixed9Hard-8 60.90n ± 0% 60.96n ± 0% +0.10% (p=0.000 n=20) AppendFloat/64Fixed1-8 46.96n ± 0% 46.95n ± 0% ~ (p=0.702 n=20) AppendFloat/64Fixed2-8 46.96n ± 0% 47.21n ± 0% +0.53% (p=0.000 n=20) AppendFloat/64Fixed2.5-8 44.24n ± 0% 44.29n ± 0% ~ (p=0.006 n=20) AppendFloat/64Fixed3-8 47.73n ± 0% 47.78n ± 0% ~ (p=0.020 n=20) AppendFloat/64Fixed4-8 44.40n ± 0% 44.60n ± 0% +0.45% (p=0.000 n=20) AppendFloat/64Fixed5Hard-8 52.52n ± 0% 52.50n ± 0% ~ (p=0.722 n=20) AppendFloat/64Fixed12-8 78.57n ± 0% 78.56n ± 0% ~ (p=0.222 n=20) AppendFloat/64Fixed16-8 65.36n ± 0% 65.22n ± 0% -0.21% (p=0.000 n=20) AppendFloat/64Fixed12Hard-8 62.04n ± 0% 61.97n ± 0% ~ (p=0.004 n=20) AppendFloat/64Fixed17Hard-8 74.30n ± 0% 74.06n ± 0% -0.32% (p=0.000 n=20) AppendFloat/64Fixed18Hard-8 4.282µ ± 0% 4.284µ ± 0% ~ (p=0.296 n=20) AppendFloat/64FixedF1-8 66.05n ± 0% 66.33n ± 0% +0.43% (p=0.000 n=20) AppendFloat/64FixedF2-8 53.67n ± 0% 54.11n ± 0% +0.81% (p=0.000 n=20) AppendFloat/64FixedF3-8 47.41n ± 0% 47.59n ± 0% +0.39% (p=0.000 n=20) AppendFloat/Slowpath64-8 97.42n ± 0% 83.21n ± 0% -14.59% (p=0.000 n=20) AppendFloat/SlowpathDenormal64-8 94.74n ± 0% 80.88n ± 0% -14.62% (p=0.000 n=20) AppendFloat/ShorterIntervalCase32-8 53.77n ± 0% AppendFloat/ShorterIntervalCase64-8 55.22n ± 0% geomean 77.14n 67.89n -10.73% host: local goos: darwin cpu: Apple M3 Pro │ 3c26aef8fba │ 8a958b0d9c1 │ │ sec/op │ sec/op vs base │ AppendFloat/Decimal-12 21.09n ± 0% 19.52n ± 0% -7.44% (p=0.000 n=20) AppendFloat/Float-12 32.36n ± 0% 28.63n ± 1% -11.54% (p=0.000 n=20) AppendFloat/Exp-12 31.77n ± 0% 21.67n ± 0% -31.79% (p=0.000 n=20) AppendFloat/NegExp-12 31.56n ± 1% 21.68n ± 0% -31.29% (p=0.000 n=20) AppendFloat/LongExp-12 33.33n ± 0% 29.21n ± 0% -12.36% (p=0.000 n=20) AppendFloat/Big-12 35.24n ± 1% 26.77n ± 0% -24.05% (p=0.000 n=20) AppendFloat/BinaryExp-12 18.88n ± 1% 19.38n ± 2% ~ (p=0.031 n=20) AppendFloat/32Integer-12 21.32n ± 1% 17.27n ± 0% -18.99% (p=0.000 n=20) AppendFloat/32ExactFraction-12 30.85n ± 1% 21.44n ± 0% -30.50% (p=0.000 n=20) AppendFloat/32Point-12 31.02n ± 1% 24.42n ± 0% -21.26% (p=0.000 n=20) AppendFloat/32Exp-12 31.55n ± 0% 19.23n ± 0% -39.06% (p=0.000 n=20) AppendFloat/32NegExp-12 30.32n ± 1% 19.06n ± 0% -37.13% (p=0.000 n=20) AppendFloat/32Shortest-12 26.68n ± 0% 20.52n ± 0% -23.07% (p=0.000 n=20) AppendFloat/32Fixed8Hard-12 17.34n ± 1% 17.24n ± 0% ~ (p=0.017 n=20) AppendFloat/32Fixed9Hard-12 19.05n ± 1% 19.25n ± 1% ~ (p=0.155 n=20) AppendFloat/64Fixed1-12 15.66n ± 0% 15.40n ± 0% -1.66% (p=0.000 n=20) AppendFloat/64Fixed2-12 15.39n ± 0% 15.12n ± 0% -1.75% (p=0.000 n=20) AppendFloat/64Fixed2.5-12 15.14n ± 0% 15.14n ± 0% ~ (p=0.645 n=20) AppendFloat/64Fixed3-12 15.53n ± 0% 15.56n ± 0% +0.23% (p=0.000 n=20) AppendFloat/64Fixed4-12 15.28n ± 0% 15.04n ± 0% -1.57% (p=0.000 n=20) AppendFloat/64Fixed5Hard-12 18.32n ± 0% 18.16n ± 0% -0.87% (p=0.000 n=20) AppendFloat/64Fixed12-12 25.51n ± 1% 25.48n ± 0% ~ (p=0.256 n=20) AppendFloat/64Fixed16-12 21.32n ± 0% 21.20n ± 0% -0.59% (p=0.000 n=20) AppendFloat/64Fixed12Hard-12 21.11n ± 1% 20.91n ± 1% -0.95% (p=0.001 n=20) AppendFloat/64Fixed17Hard-12 26.89n ± 1% 25.18n ± 3% -6.34% (p=0.000 n=20) AppendFloat/64Fixed18Hard-12 2.057µ ± 6% 2.065µ ± 1% ~ (p=0.856 n=20) AppendFloat/64FixedF1-12 24.65n ± 0% 24.19n ± 0% -1.87% (p=0.000 n=20) AppendFloat/64FixedF2-12 20.68n ± 0% 20.94n ± 0% +1.26% (p=0.000 n=20) AppendFloat/64FixedF3-12 16.44n ± 0% 16.18n ± 0% -1.58% (p=0.000 n=20) AppendFloat/Slowpath64-12 31.68n ± 0% 29.18n ± 0% -7.86% (p=0.000 n=20) AppendFloat/SlowpathDenormal64-12 29.92n ± 1% 27.12n ± 0% -9.36% (p=0.000 n=20) AppendFloat/ShorterIntervalCase32-12 18.44n ± 1% AppendFloat/ShorterIntervalCase64-12 18.57n ± 0% geomean 26.90n 23.50n -11.27% host: linux-386 goos: linux goarch: 386 cpu: Intel(R) Xeon(R) CPU @ 2.30GHz │ 3c26aef8fba │ 8a958b0d9c1 │ │ sec/op │ sec/op vs base │ AppendFloat/Decimal-16 128.2n ± 0% 143.5n ± 0% +11.93% (p=0.000 n=20) AppendFloat/Float-16 236.3n ± 0% 157.5n ± 0% -33.37% (p=0.000 n=20) AppendFloat/Exp-16 245.3n ± 0% 148.2n ± 0% -39.60% (p=0.000 n=20) AppendFloat/NegExp-16 251.2n ± 0% 147.8n ± 0% -41.17% (p=0.000 n=20) AppendFloat/LongExp-16 253.2n ± 0% 176.0n ± 0% -30.46% (p=0.000 n=20) AppendFloat/Big-16 278.6n ± 0% 158.1n ± 0% -43.23% (p=0.000 n=20) AppendFloat/BinaryExp-16 89.72n ± 0% 89.47n ± 0% ~ (p=0.155 n=20) AppendFloat/32Integer-16 127.1n ± 0% 111.7n ± 0% -12.12% (p=0.000 n=20) AppendFloat/32ExactFraction-16 206.9n ± 1% 116.3n ± 1% -43.75% (p=0.000 n=20) AppendFloat/32Point-16 196.9n ± 0% 124.5n ± 1% -36.74% (p=0.000 n=20) AppendFloat/32Exp-16 235.1n ± 1% 114.3n ± 0% -51.37% (p=0.000 n=20) AppendFloat/32NegExp-16 206.4n ± 0% 114.3n ± 1% -44.62% (p=0.000 n=20) AppendFloat/32Shortest-16 189.7n ± 0% 122.3n ± 0% -35.56% (p=0.000 n=20) AppendFloat/32Fixed8Hard-16 137.2n ± 0% 134.0n ± 0% -2.33% (p=0.000 n=20) AppendFloat/32Fixed9Hard-16 160.8n ± 0% 154.0n ± 0% -4.20% (p=0.000 n=20) AppendFloat/64Fixed1-16 140.2n ± 0% 135.0n ± 0% -3.74% (p=0.000 n=20) AppendFloat/64Fixed2-16 135.5n ± 0% 131.8n ± 0% -2.69% (p=0.000 n=20) AppendFloat/64Fixed2.5-16 133.3n ± 0% 126.5n ± 0% -5.14% (p=0.000 n=20) AppendFloat/64Fixed3-16 135.8n ± 0% 130.9n ± 0% -3.57% (p=0.000 n=20) AppendFloat/64Fixed4-16 127.9n ± 0% 122.8n ± 0% -3.99% (p=0.000 n=20) AppendFloat/64Fixed5Hard-16 140.7n ± 0% 136.2n ± 0% -3.20% (p=0.000 n=20) AppendFloat/64Fixed12-16 166.1n ± 0% 160.5n ± 0% -3.37% (p=0.000 n=20) AppendFloat/64Fixed16-16 160.1n ± 0% 154.2n ± 0% -3.65% (p=0.000 n=20) AppendFloat/64Fixed12Hard-16 156.6n ± 0% 149.0n ± 0% -4.82% (p=0.000 n=20) AppendFloat/64Fixed17Hard-16 173.9n ± 1% 169.6n ± 0% -2.44% (p=0.000 n=20) AppendFloat/64Fixed18Hard-16 10.59µ ± 1% 10.60µ ± 0% ~ (p=0.664 n=20) AppendFloat/64FixedF1-16 158.5n ± 0% 154.1n ± 0% -2.75% (p=0.000 n=20) AppendFloat/64FixedF2-16 147.1n ± 0% 143.8n ± 0% -2.21% (p=0.000 n=20) AppendFloat/64FixedF3-16 135.8n ± 0% 131.1n ± 0% -3.46% (p=0.000 n=20) AppendFloat/Slowpath64-16 244.9n ± 0% 189.7n ± 0% -22.54% (p=0.000 n=20) AppendFloat/SlowpathDenormal64-16 241.8n ± 0% 176.9n ± 0% -26.86% (p=0.000 n=20) AppendFloat/ShorterIntervalCase32-16 114.9n ± 0% AppendFloat/ShorterIntervalCase64-16 130.6n ± 0% geomean 195.7n 157.4n -18.30% host: s7:GOARCH=386 cpu: AMD Ryzen 9 7950X 16-Core Processor │ 3c26aef8fba │ 8a958b0d9c1 │ │ sec/op │ sec/op vs base │ AppendFloat/Decimal-32 42.76n ± 0% 47.25n ± 0% +10.51% (p=0.000 n=20) AppendFloat/Float-32 71.44n ± 1% 50.97n ± 0% -28.66% (p=0.000 n=20) AppendFloat/Exp-32 75.51n ± 0% 48.39n ± 1% -35.92% (p=0.000 n=20) AppendFloat/NegExp-32 74.70n ± 0% 48.17n ± 1% -35.52% (p=0.000 n=20) AppendFloat/LongExp-32 76.52n ± 0% 57.32n ± 1% -25.10% (p=0.000 n=20) AppendFloat/Big-32 83.05n ± 0% 52.92n ± 1% -36.28% (p=0.000 n=20) AppendFloat/BinaryExp-32 31.92n ± 1% 32.22n ± 0% +0.96% (p=0.000 n=20) AppendFloat/32Integer-32 41.29n ± 1% 36.81n ± 0% -10.85% (p=0.000 n=20) AppendFloat/32ExactFraction-32 62.29n ± 1% 38.16n ± 0% -38.73% (p=0.000 n=20) AppendFloat/32Point-32 60.45n ± 1% 40.44n ± 1% -33.11% (p=0.000 n=20) AppendFloat/32Exp-32 69.32n ± 1% 38.45n ± 1% -44.53% (p=0.000 n=20) AppendFloat/32NegExp-32 63.39n ± 0% 38.64n ± 1% -39.03% (p=0.000 n=20) AppendFloat/32Shortest-32 58.90n ± 1% 39.53n ± 0% -32.89% (p=0.000 n=20) AppendFloat/32Fixed8Hard-32 43.30n ± 0% 42.70n ± 1% -1.36% (p=0.000 n=20) AppendFloat/32Fixed9Hard-32 49.96n ± 1% 49.60n ± 0% -0.72% (p=0.000 n=20) AppendFloat/64Fixed1-32 42.99n ± 1% 42.08n ± 0% -2.13% (p=0.000 n=20) AppendFloat/64Fixed2-32 41.58n ± 0% 41.42n ± 1% ~ (p=0.077 n=20) AppendFloat/64Fixed2.5-32 40.47n ± 1% 40.00n ± 1% -1.15% (p=0.000 n=20) AppendFloat/64Fixed3-32 43.43n ± 1% 41.67n ± 0% -4.04% (p=0.000 n=20) AppendFloat/64Fixed4-32 40.44n ± 0% 39.40n ± 0% -2.58% (p=0.000 n=20) AppendFloat/64Fixed5Hard-32 43.41n ± 0% 42.72n ± 0% -1.60% (p=0.000 n=20) AppendFloat/64Fixed12-32 52.00n ± 0% 51.26n ± 0% -1.43% (p=0.000 n=20) AppendFloat/64Fixed16-32 50.62n ± 1% 50.55n ± 0% ~ (p=0.234 n=20) AppendFloat/64Fixed12Hard-32 49.36n ± 0% 47.89n ± 0% -2.98% (p=0.000 n=20) AppendFloat/64Fixed17Hard-32 56.91n ± 0% 55.66n ± 1% -2.19% (p=0.000 n=20) AppendFloat/64Fixed18Hard-32 3.983µ ± 0% 3.964µ ± 0% ~ (p=0.014 n=20) AppendFloat/64FixedF1-32 49.31n ± 1% 49.10n ± 1% ~ (p=0.005 n=20) AppendFloat/64FixedF2-32 45.06n ± 0% 44.00n ± 1% -2.36% (p=0.000 n=20) AppendFloat/64FixedF3-32 42.22n ± 0% 42.20n ± 1% ~ (p=0.644 n=20) AppendFloat/Slowpath64-32 75.77n ± 0% 60.89n ± 1% -19.63% (p=0.000 n=20) AppendFloat/SlowpathDenormal64-32 74.88n ± 1% 57.59n ± 1% -23.10% (p=0.000 n=20) AppendFloat/ShorterIntervalCase32-32 37.66n ± 1% AppendFloat/ShorterIntervalCase64-32 42.49n ± 1% geomean 61.34n 51.27n -15.08% host: linux-arm goarch: arm cpu: ARMv8 Processor rev 1 (v8l) │ 3c26aef8fba │ 8a958b0d9c1 │ │ sec/op │ sec/op vs base │ AppendFloat/Decimal-4 110.8n ± 0% 135.0n ± 0% +21.84% (p=0.000 n=20) AppendFloat/Float-4 172.0n ± 0% 145.1n ± 0% -15.64% (p=0.000 n=20) AppendFloat/Exp-4 172.1n ± 0% 136.2n ± 0% -20.89% (p=0.000 n=20) AppendFloat/NegExp-4 172.6n ± 0% 135.8n ± 0% -21.32% (p=0.000 n=20) AppendFloat/LongExp-4 180.2n ± 0% 161.9n ± 0% -10.18% (p=0.000 n=20) AppendFloat/Big-4 195.5n ± 0% 143.8n ± 0% -26.45% (p=0.000 n=20) AppendFloat/BinaryExp-4 84.75n ± 0% 86.40n ± 0% +1.94% (p=0.000 n=20) AppendFloat/32Integer-4 110.4n ± 0% 110.0n ± 0% -0.32% (p=0.000 n=20) AppendFloat/32ExactFraction-4 152.9n ± 0% 114.0n ± 0% -25.44% (p=0.000 n=20) AppendFloat/32Point-4 151.5n ± 0% 120.1n ± 0% -20.72% (p=0.000 n=20) AppendFloat/32Exp-4 163.1n ± 0% 111.3n ± 0% -31.76% (p=0.000 n=20) AppendFloat/32NegExp-4 152.0n ± 0% 111.1n ± 0% -26.91% (p=0.000 n=20) AppendFloat/32Shortest-4 145.8n ± 0% 116.5n ± 0% -20.13% (p=0.000 n=20) AppendFloat/32Fixed8Hard-4 104.1n ± 0% 104.0n ± 0% -0.10% (p=0.000 n=20) AppendFloat/32Fixed9Hard-4 114.2n ± 0% 115.7n ± 0% +1.31% (p=0.000 n=20) AppendFloat/64Fixed1-4 97.35n ± 0% 97.31n ± 0% ~ (p=0.357 n=20) AppendFloat/64Fixed2-4 95.74n ± 0% 95.28n ± 0% -0.49% (p=0.000 n=20) AppendFloat/64Fixed2.5-4 94.24n ± 0% 93.32n ± 0% -0.97% (p=0.000 n=20) AppendFloat/64Fixed3-4 95.56n ± 0% 95.30n ± 0% -0.27% (p=0.000 n=20) AppendFloat/64Fixed4-4 92.36n ± 0% 91.52n ± 0% -0.91% (p=0.000 n=20) AppendFloat/64Fixed5Hard-4 101.5n ± 0% 102.0n ± 0% +0.49% (p=0.000 n=20) AppendFloat/64Fixed12-4 125.5n ± 0% 124.6n ± 0% -0.72% (p=0.000 n=20) AppendFloat/64Fixed16-4 121.8n ± 0% 122.7n ± 0% +0.74% (p=0.000 n=20) AppendFloat/64Fixed12Hard-4 116.1n ± 0% 116.4n ± 0% +0.26% (p=0.000 n=20) AppendFloat/64Fixed17Hard-4 129.8n ± 0% 131.1n ± 0% +1.00% (p=0.000 n=20) AppendFloat/64Fixed18Hard-4 7.945µ ± 0% 7.950µ ± 0% +0.06% (p=0.000 n=20) AppendFloat/64FixedF1-4 112.8n ± 0% 111.4n ± 0% -1.24% (p=0.000 n=20) AppendFloat/64FixedF2-4 100.6n ± 0% 100.5n ± 0% ~ (p=0.066 n=20) AppendFloat/64FixedF3-4 96.45n ± 0% 95.41n ± 0% -1.08% (p=0.000 n=20) AppendFloat/Slowpath64-4 176.3n ± 0% 165.9n ± 0% -5.90% (p=0.000 n=20) AppendFloat/SlowpathDenormal64-4 178.2n ± 0% 152.4n ± 0% -14.48% (p=0.000 n=20) AppendFloat/ShorterIntervalCase32-4 112.8n ± 0% AppendFloat/ShorterIntervalCase64-4 119.0n ± 0% geomean 144.6n 132.1n -7.84% Change-Id: I1eb3c7b8756ad6cf938bc9b81180e01fd8a4cd9e Reviewed-on: https://go-review.googlesource.com/c/go/+/723861 Reviewed-by: Jorropo Reviewed-by: Alan Donovan LUCI-TryBot-Result: Go LUCI Auto-Submit: Russ Cox --- src/internal/strconv/ftoaryu.go | 307 -------------------------------- 1 file changed, 307 deletions(-) delete mode 100644 src/internal/strconv/ftoaryu.go diff --git a/src/internal/strconv/ftoaryu.go b/src/internal/strconv/ftoaryu.go deleted file mode 100644 index 9407bfec44..0000000000 --- a/src/internal/strconv/ftoaryu.go +++ /dev/null @@ -1,307 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package strconv - -import "math/bits" - -// binary to decimal conversion using the Ryū algorithm. -// -// See Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369) - -// ryuFtoaShortest formats mant*2^exp with prec decimal digits. -func ryuFtoaShortest(d *decimalSlice, mant uint64, exp int, flt *floatInfo) { - if mant == 0 { - d.nd, d.dp = 0, 0 - return - } - // If input is an exact integer with fewer bits than the mantissa, - // the previous and next integer are not admissible representations. - if exp <= 0 && bits.TrailingZeros64(mant) >= -exp { - mant >>= uint(-exp) - ryuDigits(d, mant, mant, mant, true, false) - return - } - ml, mc, mu, e2 := computeBounds(mant, exp, flt) - if e2 == 0 { - ryuDigits(d, ml, mc, mu, true, false) - return - } - // Find 10^q *larger* than 2^-e2 - q := mulLog10_2(-e2) + 1 - - // We are going to multiply by 10^q using 128-bit arithmetic. - // The exponent is the same for all 3 numbers. - var dl, dc, du uint64 - var dl0, dc0, du0 bool - if flt == &float32info { - var dl32, dc32, du32 uint32 - dl32, _, dl0 = mult64bitPow10(uint32(ml), e2, q) - dc32, _, dc0 = mult64bitPow10(uint32(mc), e2, q) - du32, e2, du0 = mult64bitPow10(uint32(mu), e2, q) - dl, dc, du = uint64(dl32), uint64(dc32), uint64(du32) - } else { - dl, _, dl0 = mult128bitPow10(ml, e2, q) - dc, _, dc0 = mult128bitPow10(mc, e2, q) - du, e2, du0 = mult128bitPow10(mu, e2, q) - } - if e2 >= 0 { - panic("not enough significant bits after mult128bitPow10") - } - // Is it an exact computation? - if q > 55 { - // Large positive powers of ten are not exact - dl0, dc0, du0 = false, false, false - } - if q < 0 && q >= -24 { - // Division by a power of ten may be exact. - // (note that 5^25 is a 59-bit number so division by 5^25 is never exact). - if divisiblePow5(ml, -q) { - dl0 = true - } - if divisiblePow5(mc, -q) { - dc0 = true - } - if divisiblePow5(mu, -q) { - du0 = true - } - } - // Express the results (dl, dc, du)*2^e2 as integers. - // Extra bits must be removed and rounding hints computed. - extra := uint(-e2) - extraMask := uint64(1<>extra, dl&extraMask - dc, fracc := dc>>extra, dc&extraMask - du, fracu := du>>extra, du&extraMask - // Is it allowed to use 'du' as a result? - // It is always allowed when it is truncated, but also - // if it is exact and the original binary mantissa is even - // When disallowed, we can subtract 1. - uok := !du0 || fracu > 0 - if du0 && fracu == 0 { - uok = mant&1 == 0 - } - if !uok { - du-- - } - // Is 'dc' the correctly rounded base 10 mantissa? - // The correct rounding might be dc+1 - cup := false // don't round up. - if dc0 { - // If we computed an exact product, the half integer - // should round to next (even) integer if 'dc' is odd. - cup = fracc > 1<<(extra-1) || - (fracc == 1<<(extra-1) && dc&1 == 1) - } else { - // otherwise, the result is a lower truncation of the ideal - // result. - cup = fracc>>(extra-1) == 1 - } - // Is 'dl' an allowed representation? - // Only if it is an exact value, and if the original binary mantissa - // was even. - lok := dl0 && fracl == 0 && (mant&1 == 0) - if !lok { - dl++ - } - // We need to remember whether the trimmed digits of 'dc' are zero. - c0 := dc0 && fracc == 0 - // render digits - ryuDigits(d, dl, dc, du, c0, cup) - d.dp -= q -} - -// computeBounds returns a floating-point vector (l, c, u)×2^e2 -// where the mantissas are 55-bit (or 26-bit) integers, describing the interval -// represented by the input float64 or float32. -func computeBounds(mant uint64, exp int, flt *floatInfo) (lower, central, upper uint64, e2 int) { - if mant != 1< 5e8) || (clo == 5e8 && cup) - ryuDigits32(d, lhi, chi, uhi, c0, cup, 8) - d.dp += 9 - } else { - d.nd = 0 - // emit high part - n := uint(9) - for v := chi; v > 0; { - v1, v2 := v/10, v%10 - v = v1 - n-- - d.d[n] = byte(v2 + '0') - } - d.d = d.d[n:] - d.nd = int(9 - n) - // emit low part - ryuDigits32(d, llo, clo, ulo, - c0, cup, d.nd+8) - } - // trim trailing zeros - for d.nd > 0 && d.d[d.nd-1] == '0' { - d.nd-- - } - // trim initial zeros - for d.nd > 0 && d.d[0] == '0' { - d.nd-- - d.dp-- - d.d = d.d[1:] - } -} - -// ryuDigits32 emits decimal digits for a number less than 1e9. -func ryuDigits32(d *decimalSlice, lower, central, upper uint32, - c0, cup bool, endindex int) { - if upper == 0 { - d.dp = endindex + 1 - return - } - trimmed := 0 - // Remember last trimmed digit to check for round-up. - // c0 will be used to remember zeroness of following digits. - cNextDigit := 0 - for upper > 0 { - // Repeatedly compute: - // l = Ceil(lower / 10^k) - // c = Round(central / 10^k) - // u = Floor(upper / 10^k) - // and stop when c goes out of the (l, u) interval. - l := (lower + 9) / 10 - c, cdigit := central/10, central%10 - u := upper / 10 - if l > u { - // don't trim the last digit as it is forbidden to go below l - // other, trim and exit now. - break - } - // Check that we didn't cross the lower boundary. - // The case where l < u but c == l-1 is essentially impossible, - // but may happen if: - // lower = ..11 - // central = ..19 - // upper = ..31 - // and means that 'central' is very close but less than - // an integer ending with many zeros, and usually - // the "round-up" logic hides the problem. - if l == c+1 && c < u { - c++ - cdigit = 0 - cup = false - } - trimmed++ - // Remember trimmed digits of c - c0 = c0 && cNextDigit == 0 - cNextDigit = int(cdigit) - lower, central, upper = l, c, u - } - // should we round up? - if trimmed > 0 { - cup = cNextDigit > 5 || - (cNextDigit == 5 && !c0) || - (cNextDigit == 5 && c0 && central&1 == 1) - } - if central < upper && cup { - central++ - } - // We know where the number ends, fill directly - endindex -= trimmed - v := central - n := endindex - for n > d.nd { - v1, v2 := v/100, v%100 - d.d[n] = smalls[2*v2+1] - d.d[n-1] = smalls[2*v2+0] - n -= 2 - v = v1 - } - if n == d.nd { - d.d[n] = byte(v + '0') - } - d.nd = endindex + 1 - d.dp = d.nd + trimmed -} - -// mult64bitPow10 takes a floating-point input with a 25-bit -// mantissa and multiplies it with 10^q. The resulting mantissa -// is m*P >> 57 where P is a 64-bit truncated power of 10. -// It is typically 31 or 32-bit wide. -// The returned boolean is true if all trimmed bits were zero. -// -// That is: -// -// m*2^e2 * round(10^q) = resM * 2^resE + ε -// exact = ε == 0 -func mult64bitPow10(m uint32, e2, q int) (resM uint32, resE int, exact bool) { - if q == 0 { - // P == 1<<63 - return m << 6, e2 - 6, true - } - pow, exp2, ok := pow10(q) - if !ok { - // This never happens due to the range of float32/float64 exponent - panic("mult64bitPow10: power of 10 is out of range") - } - if q < 0 { - // Inverse powers of ten must be rounded up. - pow.Hi++ - } - hi, lo := bits.Mul64(uint64(m), pow.Hi) - e2 += exp2 - 64 + 57 - return uint32(hi<<7 | lo>>57), e2, lo<<7 == 0 -} - -// mult128bitPow10 takes a floating-point input with a 55-bit -// mantissa and multiplies it with 10^q. The resulting mantissa -// is m*P >> 119 where P is a 128-bit truncated power of 10. -// It is typically 63 or 64-bit wide. -// The returned boolean is true is all trimmed bits were zero. -// -// That is: -// -// m*2^e2 * round(10^q) = resM * 2^resE + ε -// exact = ε == 0 -func mult128bitPow10(m uint64, e2, q int) (resM uint64, resE int, exact bool) { - if q == 0 { - // P == 1<<127 - return m << 8, e2 - 8, true - } - pow, exp2, ok := pow10(q) - if !ok { - // This never happens due to the range of float32/float64 exponent - panic("mult128bitPow10: power of 10 is out of range") - } - if q < 0 { - // Inverse powers of ten must be rounded up. - pow.Lo++ - } - e2 += exp2 - 128 + 119 - - hi, mid, lo := umul192(m, pow) - return hi<<9 | mid>>55, e2, mid<<9 == 0 && lo == 0 -} -- 2.52.0