goos: linux
goarch: mips64le
pkg: crypto/subtle
│ oldsubtle │ newsubtle │
│ sec/op │ sec/op vs base │
ConstantTimeByteEq-4 5.011n ± 0% 5.014n ± 0% ~ (p=0.110 n=8)
ConstantTimeEq-4 3.342n ± 0% 3.342n ± 0% ~ (p=0.993 n=8)
ConstantTimeLessOrEq-4 4.455n ± 0% 4.458n ± 0% ~ (p=0.182 n=8)
XORBytes/8Bytes-4 36.48n ± 0% 26.73n ± 0% -26.74% (p=0.000 n=8)
XORBytes/128Bytes-4 70.21n ± 0% 50.14n ± 0% -28.59% (p=0.000 n=8)
XORBytes/2048Bytes-4 566.1n ± 0% 257.2n ± 0% -54.58% (p=0.000 n=8)
XORBytes/8192Bytes-4 2123.0n ± 0% 966.8n ± 0% -54.46% (p=0.000 n=8)
XORBytes/32768Bytes-4 13.740µ ± 0% 5.614µ ± 0% -59.14% (p=0.000 n=8)
XORBytesAlignment/8Bytes0Offset-4 38.98n ± 0% 26.53n ± 0% -31.95% (p=0.000 n=8)
XORBytesAlignment/8Bytes1Offset-4 43.27n ± 0% 26.54n ± 0% -38.68% (p=0.000 n=8)
XORBytesAlignment/8Bytes2Offset-4 43.28n ± 0% 26.54n ± 0% -38.69% (p=0.000 n=8)
XORBytesAlignment/8Bytes3Offset-4 43.32n ± 0% 26.54n ± 0% -38.74% (p=0.000 n=8)
XORBytesAlignment/8Bytes4Offset-4 43.49n ± 0% 26.53n ± 0% -38.99% (p=0.000 n=8)
XORBytesAlignment/8Bytes5Offset-4 43.53n ± 0% 26.54n ± 0% -39.03% (p=0.000 n=8)
XORBytesAlignment/8Bytes6Offset-4 43.48n ± 0% 26.53n ± 0% -38.98% (p=0.000 n=8)
XORBytesAlignment/8Bytes7Offset-4 43.46n ± 1% 26.53n ± 0% -38.96% (p=0.000 n=8)
XORBytesAlignment/128Bytes0Offset-4 71.84n ± 0% 47.70n ± 1% -33.60% (p=0.000 n=8)
XORBytesAlignment/128Bytes1Offset-4 260.60n ± 0% 59.87n ± 0% -77.03% (p=0.000 n=8)
XORBytesAlignment/128Bytes2Offset-4 260.60n ± 0% 59.81n ± 0% -77.05% (p=0.000 n=8)
XORBytesAlignment/128Bytes3Offset-4 260.55n ± 0% 59.89n ± 0% -77.01% (p=0.000 n=8)
XORBytesAlignment/128Bytes4Offset-4 260.60n ± 0% 59.84n ± 0% -77.04% (p=0.000 n=8)
XORBytesAlignment/128Bytes5Offset-4 260.70n ± 0% 59.82n ± 0% -77.05% (p=0.000 n=8)
XORBytesAlignment/128Bytes6Offset-4 260.60n ± 0% 59.89n ± 0% -77.02% (p=0.000 n=8)
XORBytesAlignment/128Bytes7Offset-4 260.70n ± 0% 59.85n ± 0% -77.04% (p=0.000 n=8)
XORBytesAlignment/2048Bytes0Offset-4 552.2n ± 1% 250.0n ± 0% -54.73% (p=0.000 n=8)
XORBytesAlignment/2048Bytes1Offset-4 3603.0n ± 0% 548.6n ± 0% -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes2Offset-4 3602.0n ± 0% 548.6n ± 0% -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes3Offset-4 3604.0n ± 0% 548.6n ± 0% -84.78% (p=0.000 n=8)
XORBytesAlignment/2048Bytes4Offset-4 3603.5n ± 0% 548.9n ± 0% -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes5Offset-4 3603.0n ± 0% 548.8n ± 0% -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes6Offset-4 3602.0n ± 0% 548.6n ± 0% -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes7Offset-4 3601.5n ± 0% 548.5n ± 0% -84.77% (p=0.000 n=8)
geomean 220.0n 81.91n -62.77%
│ oldsubtle │ newsubtle │
│ B/s │ B/s vs base │
XORBytes/8Bytes-4 209.1Mi ± 0% 285.5Mi ± 0% +36.52% (p=0.000 n=8)
XORBytes/128Bytes-4 1.698Gi ± 0% 2.378Gi ± 0% +40.04% (p=0.000 n=8)
XORBytes/2048Bytes-4 3.369Gi ± 0% 7.418Gi ± 0% +120.17% (p=0.000 n=8)
XORBytes/8192Bytes-4 3.594Gi ± 0% 7.892Gi ± 0% +119.59% (p=0.000 n=8)
XORBytes/32768Bytes-4 2.221Gi ± 0% 5.436Gi ± 0% +144.76% (p=0.000 n=8)
XORBytesAlignment/8Bytes0Offset-4 195.7Mi ± 0% 287.6Mi ± 0% +46.96% (p=0.000 n=8)
XORBytesAlignment/8Bytes1Offset-4 176.3Mi ± 0% 287.5Mi ± 0% +63.06% (p=0.000 n=8)
XORBytesAlignment/8Bytes2Offset-4 176.3Mi ± 0% 287.4Mi ± 0% +63.07% (p=0.000 n=8)
XORBytesAlignment/8Bytes3Offset-4 176.1Mi ± 0% 287.5Mi ± 0% +63.25% (p=0.000 n=8)
XORBytesAlignment/8Bytes4Offset-4 175.5Mi ± 0% 287.6Mi ± 0% +63.90% (p=0.000 n=8)
XORBytesAlignment/8Bytes5Offset-4 175.3Mi ± 0% 287.5Mi ± 0% +64.02% (p=0.000 n=8)
XORBytesAlignment/8Bytes6Offset-4 175.5Mi ± 0% 287.6Mi ± 0% +63.86% (p=0.000 n=8)
XORBytesAlignment/8Bytes7Offset-4 175.5Mi ± 0% 287.6Mi ± 0% +63.85% (p=0.000 n=8)
XORBytesAlignment/128Bytes0Offset-4 1.659Gi ± 0% 2.499Gi ± 1% +50.61% (p=0.000 n=8)
XORBytesAlignment/128Bytes1Offset-4 468.4Mi ± 0% 2039.0Mi ± 0% +335.30% (p=0.000 n=8)
XORBytesAlignment/128Bytes2Offset-4 468.4Mi ± 0% 2040.9Mi ± 0% +335.73% (p=0.000 n=8)
XORBytesAlignment/128Bytes3Offset-4 468.5Mi ± 0% 2038.1Mi ± 0% +335.02% (p=0.000 n=8)
XORBytesAlignment/128Bytes4Offset-4 468.4Mi ± 0% 2040.0Mi ± 0% +335.52% (p=0.000 n=8)
XORBytesAlignment/128Bytes5Offset-4 468.2Mi ± 0% 2040.5Mi ± 0% +335.82% (p=0.000 n=8)
XORBytesAlignment/128Bytes6Offset-4 468.4Mi ± 0% 2038.2Mi ± 0% +335.13% (p=0.000 n=8)
XORBytesAlignment/128Bytes7Offset-4 468.2Mi ± 0% 2039.4Mi ± 0% +335.58% (p=0.000 n=8)
XORBytesAlignment/2048Bytes0Offset-4 3.454Gi ± 1% 7.629Gi ± 0% +120.90% (p=0.000 n=8)
XORBytesAlignment/2048Bytes1Offset-4 542.1Mi ± 0% 3560.1Mi ± 0% +556.68% (p=0.000 n=8)
XORBytesAlignment/2048Bytes2Offset-4 542.3Mi ± 0% 3560.1Mi ± 0% +556.48% (p=0.000 n=8)
XORBytesAlignment/2048Bytes3Offset-4 541.9Mi ± 0% 3560.0Mi ± 0% +556.93% (p=0.000 n=8)
XORBytesAlignment/2048Bytes4Offset-4 542.0Mi ± 0% 3558.8Mi ± 0% +556.67% (p=0.000 n=8)
XORBytesAlignment/2048Bytes5Offset-4 542.1Mi ± 3% 3558.8Mi ± 0% +556.53% (p=0.000 n=8)
XORBytesAlignment/2048Bytes6Offset-4 542.2Mi ± 0% 3560.2Mi ± 0% +556.57% (p=0.000 n=8)
XORBytesAlignment/2048Bytes7Offset-4 542.3Mi ± 0% 3560.5Mi ± 0% +556.56% (p=0.000 n=8)
geomean 514.9Mi 1.496Gi +197.56%
Change-Id: I649fa6bfca31296d65cccdf5fceb3dcfa0c588a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/666255
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build (amd64 || arm64 || loong64 || ppc64 || ppc64le || riscv64) && !purego
+//go:build (amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64) && !purego
package subtle
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build (!amd64 && !arm64 && !loong64 && !ppc64 && !ppc64le && !riscv64) || purego
+//go:build (!amd64 && !arm64 && !loong64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !riscv64) || purego
package subtle
--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build (mips64 || mips64le) && !purego
+
+#include "textflag.h"
+
+// func xorBytes(dst, a, b *byte, n int)
+TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
+ MOVV dst+0(FP), R1
+ MOVV a+8(FP), R2
+ MOVV b+16(FP), R3
+ MOVV n+24(FP), R4
+
+xor_64_check:
+ SGTU $64, R4, R5 // R5 = 1 if (64 > R4)
+ BNE R5, xor_32_check
+xor_64:
+ MOVV (R2), R6
+ MOVV 8(R2), R7
+ MOVV 16(R2), R8
+ MOVV 24(R2), R9
+ MOVV (R3), R10
+ MOVV 8(R3), R11
+ MOVV 16(R3), R12
+ MOVV 24(R3), R13
+ XOR R6, R10
+ XOR R7, R11
+ XOR R8, R12
+ XOR R9, R13
+ MOVV R10, (R1)
+ MOVV R11, 8(R1)
+ MOVV R12, 16(R1)
+ MOVV R13, 24(R1)
+ MOVV 32(R2), R6
+ MOVV 40(R2), R7
+ MOVV 48(R2), R8
+ MOVV 56(R2), R9
+ MOVV 32(R3), R10
+ MOVV 40(R3), R11
+ MOVV 48(R3), R12
+ MOVV 56(R3), R13
+ XOR R6, R10
+ XOR R7, R11
+ XOR R8, R12
+ XOR R9, R13
+ MOVV R10, 32(R1)
+ MOVV R11, 40(R1)
+ MOVV R12, 48(R1)
+ MOVV R13, 56(R1)
+ ADDV $64, R2
+ ADDV $64, R3
+ ADDV $64, R1
+ SUBV $64, R4
+ SGTU $64, R4, R5
+ BEQ R0, R5, xor_64
+ BEQ R0, R4, end
+
+xor_32_check:
+ SGTU $32, R4, R5
+ BNE R5, xor_16_check
+xor_32:
+ MOVV (R2), R6
+ MOVV 8(R2), R7
+ MOVV 16(R2), R8
+ MOVV 24(R2), R9
+ MOVV (R3), R10
+ MOVV 8(R3), R11
+ MOVV 16(R3), R12
+ MOVV 24(R3), R13
+ XOR R6, R10
+ XOR R7, R11
+ XOR R8, R12
+ XOR R9, R13
+ MOVV R10, (R1)
+ MOVV R11, 8(R1)
+ MOVV R12, 16(R1)
+ MOVV R13, 24(R1)
+ ADDV $32, R2
+ ADDV $32, R3
+ ADDV $32, R1
+ SUBV $32, R4
+ BEQ R0, R4, end
+
+xor_16_check:
+ SGTU $16, R4, R5
+ BNE R5, xor_8_check
+xor_16:
+ MOVV (R2), R6
+ MOVV 8(R2), R7
+ MOVV (R3), R8
+ MOVV 8(R3), R9
+ XOR R6, R8
+ XOR R7, R9
+ MOVV R8, (R1)
+ MOVV R9, 8(R1)
+ ADDV $16, R2
+ ADDV $16, R3
+ ADDV $16, R1
+ SUBV $16, R4
+ BEQ R0, R4, end
+
+xor_8_check:
+ SGTU $8, R4, R5
+ BNE R5, xor_4_check
+xor_8:
+ MOVV (R2), R6
+ MOVV (R3), R7
+ XOR R6, R7
+ MOVV R7, (R1)
+ ADDV $8, R1
+ ADDV $8, R2
+ ADDV $8, R3
+ SUBV $8, R4
+ BEQ R0, R4, end
+
+xor_4_check:
+ SGTU $4, R4, R5
+ BNE R5, xor_2_check
+xor_4:
+ MOVW (R2), R6
+ MOVW (R3), R7
+ XOR R6, R7
+ MOVW R7, (R1)
+ ADDV $4, R2
+ ADDV $4, R3
+ ADDV $4, R1
+ SUBV $4, R4
+ BEQ R0, R4, end
+
+xor_2_check:
+ SGTU $2, R4, R5
+ BNE R5, xor_1
+xor_2:
+ MOVH (R2), R6
+ MOVH (R3), R7
+ XOR R6, R7
+ MOVH R7, (R1)
+ ADDV $2, R2
+ ADDV $2, R3
+ ADDV $2, R1
+ SUBV $2, R4
+ BEQ R0, R4, end
+
+xor_1:
+ MOVB (R2), R6
+ MOVB (R3), R7
+ XOR R6, R7
+ MOVB R7, (R1)
+
+end:
+ RET