--- /dev/null
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// LoongArch64-specific hardware-assisted CRC32 algorithms. See crc32.go for a
+// description of the interface that each architecture-specific file
+// implements.
+
+package crc32
+
+import "internal/cpu"
+
+func castagnoliUpdate(crc uint32, p []byte) uint32
+func ieeeUpdate(crc uint32, p []byte) uint32
+
+func archAvailableCastagnoli() bool {
+ return cpu.Loong64.HasCRC32
+}
+
+func archInitCastagnoli() {
+ if !cpu.Loong64.HasCRC32 {
+ panic("arch-specific crc32 instruction for Castagnoli not available")
+ }
+}
+
+func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
+ if !cpu.Loong64.HasCRC32 {
+ panic("arch-specific crc32 instruction for Castagnoli not available")
+ }
+
+ return ^castagnoliUpdate(^crc, p)
+}
+
+func archAvailableIEEE() bool {
+ return cpu.Loong64.HasCRC32
+}
+
+func archInitIEEE() {
+ if !cpu.Loong64.HasCRC32 {
+ panic("arch-specific crc32 instruction for IEEE not available")
+ }
+}
+
+func archUpdateIEEE(crc uint32, p []byte) uint32 {
+ if !cpu.Loong64.HasCRC32 {
+ panic("arch-specific crc32 instruction for IEEE not available")
+ }
+
+ return ^ieeeUpdate(^crc, p)
+}
--- /dev/null
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// castagnoliUpdate updates the non-inverted crc with the given data.
+
+// func castagnoliUpdate(crc uint32, p []byte) uint32
+TEXT ·castagnoliUpdate(SB),NOSPLIT,$0-36
+ MOVWU crc+0(FP), R4 // a0 = CRC value
+ MOVV p+8(FP), R5 // a1 = data pointer
+ MOVV p_len+16(FP), R6 // a2 = len(p)
+
+ SGT $8, R6, R12
+ BNE R12, less_than_8
+ AND $7, R5, R12
+ BEQ R12, aligned
+
+ // Process the first few bytes to 8-byte align the input.
+ // t0 = 8 - t0. We need to process this many bytes to align.
+ SUB $1, R12
+ XOR $7, R12
+
+ AND $1, R12, R13
+ BEQ R13, align_2
+ MOVB (R5), R13
+ CRCCWBW R4, R13, R4
+ ADDV $1, R5
+ ADDV $-1, R6
+
+align_2:
+ AND $2, R12, R13
+ BEQ R13, align_4
+ MOVH (R5), R13
+ CRCCWHW R4, R13, R4
+ ADDV $2, R5
+ ADDV $-2, R6
+
+align_4:
+ AND $4, R12, R13
+ BEQ R13, aligned
+ MOVW (R5), R13
+ CRCCWWW R4, R13, R4
+ ADDV $4, R5
+ ADDV $-4, R6
+
+aligned:
+ // The input is now 8-byte aligned and we can process 8-byte chunks.
+ SGT $8, R6, R12
+ BNE R12, less_than_8
+ MOVV (R5), R13
+ CRCCWVW R4, R13, R4
+ ADDV $8, R5
+ ADDV $-8, R6
+ JMP aligned
+
+less_than_8:
+ // We may have some bytes left over; process 4 bytes, then 2, then 1.
+ AND $4, R6, R12
+ BEQ R12, less_than_4
+ MOVW (R5), R13
+ CRCCWWW R4, R13, R4
+ ADDV $4, R5
+ ADDV $-4, R6
+
+less_than_4:
+ AND $2, R6, R12
+ BEQ R12, less_than_2
+ MOVH (R5), R13
+ CRCCWHW R4, R13, R4
+ ADDV $2, R5
+ ADDV $-2, R6
+
+less_than_2:
+ BEQ R6, done
+ MOVB (R5), R13
+ CRCCWBW R4, R13, R4
+
+done:
+ MOVW R4, ret+32(FP)
+ RET
+
+// ieeeUpdate updates the non-inverted crc with the given data.
+
+// func ieeeUpdate(crc uint32, p []byte) uint32
+TEXT ·ieeeUpdate(SB),NOSPLIT,$0-36
+ MOVWU crc+0(FP), R4 // a0 = CRC value
+ MOVV p+8(FP), R5 // a1 = data pointer
+ MOVV p_len+16(FP), R6 // a2 = len(p)
+
+ SGT $8, R6, R12
+ BNE R12, less_than_8
+ AND $7, R5, R12
+ BEQ R12, aligned
+
+ // Process the first few bytes to 8-byte align the input.
+ // t0 = 8 - t0. We need to process this many bytes to align.
+ SUB $1, R12
+ XOR $7, R12
+
+ AND $1, R12, R13
+ BEQ R13, align_2
+ MOVB (R5), R13
+ CRCWBW R4, R13, R4
+ ADDV $1, R5
+ ADDV $-1, R6
+
+align_2:
+ AND $2, R12, R13
+ BEQ R13, align_4
+ MOVH (R5), R13
+ CRCWHW R4, R13, R4
+ ADDV $2, R5
+ ADDV $-2, R6
+
+align_4:
+ AND $4, R12, R13
+ BEQ R13, aligned
+ MOVW (R5), R13
+ CRCWWW R4, R13, R4
+ ADDV $4, R5
+ ADDV $-4, R6
+
+aligned:
+ // The input is now 8-byte aligned and we can process 8-byte chunks.
+ SGT $8, R6, R12
+ BNE R12, less_than_8
+ MOVV (R5), R13
+ CRCWVW R4, R13, R4
+ ADDV $8, R5
+ ADDV $-8, R6
+ JMP aligned
+
+less_than_8:
+ // We may have some bytes left over; process 4 bytes, then 2, then 1.
+ AND $4, R6, R12
+ BEQ R12, less_than_4
+ MOVW (R5), R13
+ CRCWWW R4, R13, R4
+ ADDV $4, R5
+ ADDV $-4, R6
+
+less_than_4:
+ AND $2, R6, R12
+ BEQ R12, less_than_2
+ MOVH (R5), R13
+ CRCWHW R4, R13, R4
+ ADDV $2, R5
+ ADDV $-2, R6
+
+less_than_2:
+ BEQ R6, done
+ MOVB (R5), R13
+ CRCWBW R4, R13, R4
+
+done:
+ MOVW R4, ret+32(FP)
+ RET
+