]> Cypherpunks repositories - gostls13.git/commitdiff
internal/bytealg: fix alignment code in compare_riscv64.s
authorMark Ryan <markdryan@rivosinc.com>
Wed, 24 May 2023 11:20:11 +0000 (13:20 +0200)
committerKeith Randall <khr@google.com>
Tue, 30 May 2023 16:05:30 +0000 (16:05 +0000)
The riscv64 implementation of compare has an optimization that is
applied when both pointers share the same alignment but that alignment
is not 8 bytes.  In this case it tries to align both pointers to an 8 byte boundaries,
by individually comparing the first few bytes of each buffer.  Unfortunately,
the existing code is incorrect.  It adjusts the pointers by the wrong number
of bytes resulting, in most cases, in pointers that are not 8 byte aligned.

This commit fixes the issue by individually comparing the first
(8 - (pointer & 7)) bytes of each buffer rather than the first
(pointer & 7) bytes.

We also remove an unnecessary immediate MOV instruction.

This particular optimization is not covered by any of the existing
benchmarks so a new benchmark, benchmarkCompareBytesBigBothUnaligned,
is provided.  The benchmark tests the case where both pointers have
the same alignment but may not be 8 byte aligned.  Results of the
new benchmark along with some of the existing benchmarks generated on
a SiFive HiFive Unmatched A00 with 16GB of RAM running Ubuntu 23.04
are presented below.

CompareBytesEqual-4                           70.00n ±  6%   68.32n ±  0%   -2.40% (p=0.020 n=10)
CompareBytesToNil-4                           19.31n ±  0%   18.47n ±  0%   -4.35% (p=0.000 n=10)
CompareBytesEmpty-4                           16.79n ±  0%   15.95n ±  0%   -4.97% (p=0.000 n=10)
CompareBytesIdentical-4                       19.94n ± 15%   18.32n ± 13%   -8.15% (p=0.040 n=10)
CompareBytesSameLength-4                      37.93n ±  0%   42.44n ±  1%  +11.91% (p=0.000 n=10)
CompareBytesDifferentLength-4                 37.93n ±  0%   42.44n ±  0%  +11.89% (p=0.000 n=10)
CompareBytesBigUnaligned/offset=1-4           3.881m ± 14%   3.880m ± 15%        ~ (p=0.436 n=10)
CompareBytesBigUnaligned/offset=2-4           3.884m ±  0%   3.875m ±  0%        ~ (p=0.190 n=10)
CompareBytesBigUnaligned/offset=3-4           3.858m ±  1%   3.868m ±  1%        ~ (p=0.105 n=10)
CompareBytesBigUnaligned/offset=4-4           3.877m ±  1%   3.876m ±  0%        ~ (p=0.529 n=10)
CompareBytesBigUnaligned/offset=5-4           3.859m ±  0%   3.874m ±  0%   +0.39% (p=0.009 n=10)
CompareBytesBigUnaligned/offset=6-4           3.878m ±  1%   3.876m ±  0%        ~ (p=0.353 n=10)
CompareBytesBigUnaligned/offset=7-4           3.868m ±  1%   3.877m ±  0%        ~ (p=0.190 n=10)
CompareBytesBigBothUnaligned/offset=0-4       1.586m ±  0%   1.765m ±  0%  +11.30% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=1-4     153.132m ±  1%   1.765m ±  1%  -98.85% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=2-4     152.930m ±  1%   1.765m ±  1%  -98.85% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=3-4     152.093m ±  1%   1.769m ±  0%  -98.84% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=4-4       1.602m ±  0%   1.764m ±  0%  +10.11% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=5-4     152.314m ±  1%   1.768m ±  0%  -98.84% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=6-4     152.905m ±  1%   1.764m ±  1%  -98.85% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=7-4     152.951m ±  1%   1.804m ±  2%  -98.82% (p=0.000 n=10)
CompareBytesBig-4                             1.441m ± 21%   1.373m ± 55%        ~ (p=0.481 n=10)
CompareBytesBigIdentical-4                    19.94n ±  1%   19.10n ±  0%   -4.21% (p=0.001 n=10)
geomean                                       243.7µ         76.65µ        -68.54%

CompareBytesBigUnaligned/offset=1-4          257.7Mi ± 12%     257.7Mi ± 13%          ~ (p=0.424 n=10)
CompareBytesBigUnaligned/offset=2-4          257.5Mi ±  0%     258.1Mi ±  0%          ~ (p=0.190 n=10)
CompareBytesBigUnaligned/offset=3-4          259.2Mi ±  1%     258.5Mi ±  1%          ~ (p=0.105 n=10)
CompareBytesBigUnaligned/offset=4-4          257.9Mi ±  1%     258.0Mi ±  0%          ~ (p=0.529 n=10)
CompareBytesBigUnaligned/offset=5-4          259.1Mi ±  0%     258.1Mi ±  0%     -0.39% (p=0.008 n=10)
CompareBytesBigUnaligned/offset=6-4          257.9Mi ±  1%     258.0Mi ±  0%          ~ (p=0.353 n=10)
CompareBytesBigUnaligned/offset=7-4          258.5Mi ±  1%     257.9Mi ±  0%          ~ (p=0.190 n=10)
CompareBytesBigBothUnaligned/offset=0-4      630.6Mi ±  0%     566.6Mi ±  0%    -10.15% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=1-4      6.533Mi ±  1%   566.545Mi ±  1%  +8572.48% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=2-4      6.537Mi ±  1%   566.683Mi ±  1%  +8568.27% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=3-4      6.576Mi ±  1%   565.200Mi ±  0%  +8495.43% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=4-4      624.2Mi ±  0%     566.9Mi ±  0%     -9.18% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=5-4      6.566Mi ±  1%   565.758Mi ±  0%  +8516.41% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=6-4      6.542Mi ±  1%   567.036Mi ±  1%  +8567.35% (p=0.000 n=10)
CompareBytesBigBothUnaligned/offset=7-4      6.542Mi ±  1%   554.390Mi ±  2%  +8374.05% (p=0.000 n=10)
CompareBytesBig-4                            694.2Mi ± 18%     728.1Mi ± 35%          ~ (p=0.481 n=10)
CompareBytesBigIdentical-4                   47.83Ti ±  1%     49.92Ti ±  0%     +4.39% (p=0.002 n=10)
geomean                                      170.0Mi           813.8Mi         +378.66%

Change-Id: I0a2d0386d5ca1ffa249682a12ebd1533508e31e9
Reviewed-on: https://go-review.googlesource.com/c/go/+/497838
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: M Zhuo <mzh@golangcn.org>
Reviewed-by: Keith Randall <khr@google.com>
Run-TryBot: M Zhuo <mzh@golangcn.org>

src/bytes/compare_test.go
src/internal/bytealg/compare_riscv64.s

index a0150abd99202458685e39ac1b0a572f7cebc677..ac39f880f4c3ad866d33e2e3b6512c70555dfaae 100644 (file)
@@ -238,6 +238,32 @@ func BenchmarkCompareBytesBigUnaligned(b *testing.B) {
        }
 }
 
+func benchmarkCompareBytesBigBothUnaligned(b *testing.B, offset int) {
+       b.StopTimer()
+       pattern := []byte("Hello Gophers!")
+       b1 := make([]byte, 0, 1<<20+len(pattern))
+       for len(b1) < 1<<20 {
+               b1 = append(b1, pattern...)
+       }
+       b2 := make([]byte, len(b1))
+       copy(b2, b1)
+       b.StartTimer()
+       for j := 0; j < b.N; j++ {
+               if Compare(b1[offset:], b2[offset:]) != 0 {
+                       b.Fatal("b1 != b2")
+               }
+       }
+       b.SetBytes(int64(len(b1[offset:])))
+}
+
+func BenchmarkCompareBytesBigBothUnaligned(b *testing.B) {
+       for i := 0; i < 8; i++ {
+               b.Run(fmt.Sprintf("offset=%d", i), func(b *testing.B) {
+                       benchmarkCompareBytesBigBothUnaligned(b, i)
+               })
+       }
+}
+
 func BenchmarkCompareBytesBig(b *testing.B) {
        b.StopTimer()
        b1 := make([]byte, 0, 1<<20)
index 68cba2a37f18527497004d17491221398b1e5b8b..a4164a2b81381b75e228c590dcb9e8f7530f5a59 100644 (file)
@@ -49,6 +49,8 @@ use_a_len:
        BEQZ    X7, compare32
 
        // Check one byte at a time until we reach 8 byte alignment.
+       SUB     X7, X0, X7
+       ADD     $8, X7, X7
        SUB     X7, X5, X5
 align:
        ADD     $-1, X7
@@ -60,7 +62,7 @@ align:
        BNEZ    X7, align
 
 check32:
-       MOV     $32, X6
+       // X6 contains $32
        BLT     X5, X6, compare16
 compare32:
        MOV     0(X10), X15