// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build (amd64 || arm64 || loong64 || ppc64 || ppc64le) && !purego
+//go:build (amd64 || arm64 || loong64 || ppc64 || ppc64le || riscv64) && !purego
package subtle
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build (!amd64 && !arm64 && !loong64 && !ppc64 && !ppc64le) || purego
+//go:build (!amd64 && !arm64 && !loong64 && !ppc64 && !ppc64le && !riscv64) || purego
package subtle
--- /dev/null
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !purego
+
+#include "textflag.h"
+
+// func xorBytes(dst, a, b *byte, n int)
+TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
+ MOV dst+0(FP), X10
+ MOV a+8(FP), X11
+ MOV b+16(FP), X12
+ MOV n+24(FP), X13
+
+ MOV $32, X15
+ BLT X13, X15, loop4_check
+
+ // Check alignment - if alignment differs we have to do one byte at a time.
+ AND $7, X10, X5
+ AND $7, X11, X6
+ AND $7, X12, X7
+ BNE X5, X6, loop4_check
+ BNE X5, X7, loop4_check
+ BEQZ X5, loop64_check
+
+ // Check one byte at a time until we reach 8 byte alignment.
+ MOV $8, X8
+ SUB X5, X8
+ SUB X8, X13
+align:
+ MOVBU 0(X11), X16
+ MOVBU 0(X12), X17
+ XOR X16, X17
+ MOVB X17, 0(X10)
+ ADD $1, X10
+ ADD $1, X11
+ ADD $1, X12
+ SUB $1, X8
+ BNEZ X8, align
+
+loop64_check:
+ MOV $64, X15
+ BLT X13, X15, tail32_check
+ PCALIGN $16
+loop64:
+ MOV 0(X11), X16
+ MOV 0(X12), X17
+ MOV 8(X11), X18
+ MOV 8(X12), X19
+ XOR X16, X17
+ XOR X18, X19
+ MOV X17, 0(X10)
+ MOV X19, 8(X10)
+ MOV 16(X11), X20
+ MOV 16(X12), X21
+ MOV 24(X11), X22
+ MOV 24(X12), X23
+ XOR X20, X21
+ XOR X22, X23
+ MOV X21, 16(X10)
+ MOV X23, 24(X10)
+ MOV 32(X11), X16
+ MOV 32(X12), X17
+ MOV 40(X11), X18
+ MOV 40(X12), X19
+ XOR X16, X17
+ XOR X18, X19
+ MOV X17, 32(X10)
+ MOV X19, 40(X10)
+ MOV 48(X11), X20
+ MOV 48(X12), X21
+ MOV 56(X11), X22
+ MOV 56(X12), X23
+ XOR X20, X21
+ XOR X22, X23
+ MOV X21, 48(X10)
+ MOV X23, 56(X10)
+ ADD $64, X10
+ ADD $64, X11
+ ADD $64, X12
+ SUB $64, X13
+ BGE X13, X15, loop64
+ BEQZ X13, done
+
+tail32_check:
+ MOV $32, X15
+ BLT X13, X15, tail16_check
+ MOV 0(X11), X16
+ MOV 0(X12), X17
+ MOV 8(X11), X18
+ MOV 8(X12), X19
+ XOR X16, X17
+ XOR X18, X19
+ MOV X17, 0(X10)
+ MOV X19, 8(X10)
+ MOV 16(X11), X20
+ MOV 16(X12), X21
+ MOV 24(X11), X22
+ MOV 24(X12), X23
+ XOR X20, X21
+ XOR X22, X23
+ MOV X21, 16(X10)
+ MOV X23, 24(X10)
+ ADD $32, X10
+ ADD $32, X11
+ ADD $32, X12
+ SUB $32, X13
+ BEQZ X13, done
+
+tail16_check:
+ MOV $16, X15
+ BLT X13, X15, loop4_check
+ MOV 0(X11), X16
+ MOV 0(X12), X17
+ MOV 8(X11), X18
+ MOV 8(X12), X19
+ XOR X16, X17
+ XOR X18, X19
+ MOV X17, 0(X10)
+ MOV X19, 8(X10)
+ ADD $16, X10
+ ADD $16, X11
+ ADD $16, X12
+ SUB $16, X13
+ BEQZ X13, done
+
+loop4_check:
+ MOV $4, X15
+ BLT X13, X15, loop1
+ PCALIGN $16
+loop4:
+ MOVBU 0(X11), X16
+ MOVBU 0(X12), X17
+ MOVBU 1(X11), X18
+ MOVBU 1(X12), X19
+ XOR X16, X17
+ XOR X18, X19
+ MOVB X17, 0(X10)
+ MOVB X19, 1(X10)
+ MOVBU 2(X11), X20
+ MOVBU 2(X12), X21
+ MOVBU 3(X11), X22
+ MOVBU 3(X12), X23
+ XOR X20, X21
+ XOR X22, X23
+ MOVB X21, 2(X10)
+ MOVB X23, 3(X10)
+ ADD $4, X10
+ ADD $4, X11
+ ADD $4, X12
+ SUB $4, X13
+ BGE X13, X15, loop4
+
+ PCALIGN $16
+loop1:
+ BEQZ X13, done
+ MOVBU 0(X11), X16
+ MOVBU 0(X12), X17
+ XOR X16, X17
+ MOVB X17, 0(X10)
+ ADD $1, X10
+ ADD $1, X11
+ ADD $1, X12
+ SUB $1, X13
+ JMP loop1
+
+done:
+ RET