runtime/asm_386.s: [386] ldt0setup: function ldt0setup missing Go declaration
runtime/asm_386.s: [386] emptyfunc: function emptyfunc missing Go declaration
runtime/asm_386.s: [386] aeshashbody: function aeshashbody missing Go declaration
-runtime/asm_386.s: [386] memeqbody: function memeqbody missing Go declaration
runtime/asm_386.s: [386] cmpbody: function cmpbody missing Go declaration
runtime/asm_386.s: [386] addmoduledata: function addmoduledata missing Go declaration
runtime/duff_386.s: [386] duffzero: function duffzero missing Go declaration
// Nothing much to do about cross-package assembly. Unfortunate.
runtime/asm_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: call is in package reflect
-runtime/asm_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: Equal is in package bytes
+internal/bytealg/equal_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: Equal is in package bytes
+internal/bytealg/equal_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: memequal is in package runtime
+internal/bytealg/equal_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: memequal_varlen is in package runtime
internal/bytealg/indexbyte_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: IndexByte is in package bytes
internal/bytealg/indexbyte_ARCHSUFF.s: [GOARCH] cannot check cross-package assembly function: IndexByte is in package strings
// Others use the platform ABI.
// There is no sensible corresponding Go prototype.
runtime/asm_amd64.s: [amd64] aeshashbody: function aeshashbody missing Go declaration
-runtime/asm_amd64.s: [amd64] memeqbody: function memeqbody missing Go declaration
runtime/asm_amd64.s: [amd64] cmpbody: function cmpbody missing Go declaration
runtime/asm_amd64.s: [amd64] addmoduledata: function addmoduledata missing Go declaration
runtime/duff_amd64.s: [amd64] duffzero: function duffzero missing Go declaration
runtime/asm_amd64p32.s: [amd64p32] rt0_go: unknown variable argc
runtime/asm_amd64p32.s: [amd64p32] rt0_go: unknown variable argv
-runtime/asm_amd64p32.s: [amd64p32] memeqbody: function memeqbody missing Go declaration
runtime/asm_amd64p32.s: [amd64p32] cannot check cross-package assembly function: Compare is in package bytes
runtime/asm_amd64p32.s: [amd64p32] cmpbody: function cmpbody missing Go declaration
runtime/asm_amd64p32.s: [amd64p32] asmcgocall: RET without writing to 4-byte ret+8(FP)
runtime/asm_ppc64x.s: [GOARCH] reginit: function reginit missing Go declaration
runtime/asm_ppc64x.s: [GOARCH] abort: function abort missing Go declaration
-runtime/asm_ppc64x.s: [GOARCH] memeqbody: function memeqbody missing Go declaration
runtime/asm_ppc64x.s: [GOARCH] goexit: use of 24(R1) points beyond argument frame
runtime/asm_ppc64x.s: [GOARCH] addmoduledata: function addmoduledata missing Go declaration
runtime/duff_ppc64x.s: [GOARCH] duffzero: function duffzero missing Go declaration
runtime/asm_s390x.s: [s390x] abort: function abort missing Go declaration
-runtime/asm_s390x.s: [s390x] memeqbody: function memeqbody missing Go declaration
-runtime/asm_s390x.s: [s390x] memeqbodyclc: function memeqbodyclc missing Go declaration
runtime/asm_s390x.s: [s390x] cannot check cross-package assembly function: Compare is in package bytes
runtime/asm_s390x.s: [s390x] cmpbody: function cmpbody missing Go declaration
runtime/asm_s390x.s: [s390x] cmpbodyclc: function cmpbodyclc missing Go declaration
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Equal(SB),NOSPLIT,$0-25
+ MOVL a_len+4(FP), BX
+ MOVL b_len+16(FP), CX
+ CMPL BX, CX
+ JNE neq
+ MOVL a_base+0(FP), SI
+ MOVL b_base+12(FP), DI
+ CMPL SI, DI
+ JEQ eq
+ LEAL ret+24(FP), AX
+ JMP memeqbody<>(SB)
+neq:
+ MOVB $0, ret+24(FP)
+ RET
+eq:
+ MOVB $1, ret+24(FP)
+ RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-25
+ MOVL a_len+4(FP), BX
+ MOVL b_len+16(FP), CX
+ CMPL BX, CX
+ JNE neq
+ MOVL a_base+0(FP), SI
+ MOVL b_base+12(FP), DI
+ CMPL SI, DI
+ JEQ eq
+ LEAL ret+24(FP), AX
+ JMP memeqbody<>(SB)
+neq:
+ MOVB $0, ret+24(FP)
+ RET
+eq:
+ MOVB $1, ret+24(FP)
+ RET
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT,$0-13
+ MOVL a+0(FP), SI
+ MOVL b+4(FP), DI
+ CMPL SI, DI
+ JEQ eq
+ MOVL size+8(FP), BX
+ LEAL ret+12(FP), AX
+ JMP memeqbody<>(SB)
+eq:
+ MOVB $1, ret+12(FP)
+ RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
+ MOVL a+0(FP), SI
+ MOVL b+4(FP), DI
+ CMPL SI, DI
+ JEQ eq
+ MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
+ LEAL ret+8(FP), AX
+ JMP memeqbody<>(SB)
+eq:
+ MOVB $1, ret+8(FP)
+ RET
+
+// a in SI
+// b in DI
+// count in BX
+// address of result byte in AX
+TEXT memeqbody<>(SB),NOSPLIT,$0-0
+ CMPL BX, $4
+ JB small
+
+ // 64 bytes at a time using xmm registers
+hugeloop:
+ CMPL BX, $64
+ JB bigloop
+ CMPB internal∕cpu·X86+const_x86_HasSSE2(SB), $1
+ JNE bigloop
+ MOVOU (SI), X0
+ MOVOU (DI), X1
+ MOVOU 16(SI), X2
+ MOVOU 16(DI), X3
+ MOVOU 32(SI), X4
+ MOVOU 32(DI), X5
+ MOVOU 48(SI), X6
+ MOVOU 48(DI), X7
+ PCMPEQB X1, X0
+ PCMPEQB X3, X2
+ PCMPEQB X5, X4
+ PCMPEQB X7, X6
+ PAND X2, X0
+ PAND X6, X4
+ PAND X4, X0
+ PMOVMSKB X0, DX
+ ADDL $64, SI
+ ADDL $64, DI
+ SUBL $64, BX
+ CMPL DX, $0xffff
+ JEQ hugeloop
+ MOVB $0, (AX)
+ RET
+
+ // 4 bytes at a time using 32-bit register
+bigloop:
+ CMPL BX, $4
+ JBE leftover
+ MOVL (SI), CX
+ MOVL (DI), DX
+ ADDL $4, SI
+ ADDL $4, DI
+ SUBL $4, BX
+ CMPL CX, DX
+ JEQ bigloop
+ MOVB $0, (AX)
+ RET
+
+ // remaining 0-4 bytes
+leftover:
+ MOVL -4(SI)(BX*1), CX
+ MOVL -4(DI)(BX*1), DX
+ CMPL CX, DX
+ SETEQ (AX)
+ RET
+
+small:
+ CMPL BX, $0
+ JEQ equal
+
+ LEAL 0(BX*8), CX
+ NEGL CX
+
+ MOVL SI, DX
+ CMPB DX, $0xfc
+ JA si_high
+
+ // load at SI won't cross a page boundary.
+ MOVL (SI), SI
+ JMP si_finish
+si_high:
+ // address ends in 111111xx. Load up to bytes we want, move to correct position.
+ MOVL -4(SI)(BX*1), SI
+ SHRL CX, SI
+si_finish:
+
+ // same for DI.
+ MOVL DI, DX
+ CMPB DX, $0xfc
+ JA di_high
+ MOVL (DI), DI
+ JMP di_finish
+di_high:
+ MOVL -4(DI)(BX*1), DI
+ SHRL CX, DI
+di_finish:
+
+ SUBL SI, DI
+ SHLL CX, DI
+equal:
+ SETEQ (AX)
+ RET
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Equal(SB),NOSPLIT,$0-49
+ MOVQ a_len+8(FP), BX
+ MOVQ b_len+32(FP), CX
+ CMPQ BX, CX
+ JNE neq
+ MOVQ a_base+0(FP), SI
+ MOVQ b_base+24(FP), DI
+ CMPQ SI, DI
+ JEQ eq
+ LEAQ ret+48(FP), AX
+ JMP memeqbody<>(SB)
+neq:
+ MOVB $0, ret+48(FP)
+ RET
+eq:
+ MOVB $1, ret+48(FP)
+ RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-49
+ MOVQ a_len+8(FP), BX
+ MOVQ b_len+32(FP), CX
+ CMPQ BX, CX
+ JNE neq
+ MOVQ a_base+0(FP), SI
+ MOVQ b_base+24(FP), DI
+ CMPQ SI, DI
+ JEQ eq
+ LEAQ ret+48(FP), AX
+ JMP memeqbody<>(SB)
+neq:
+ MOVB $0, ret+48(FP)
+ RET
+eq:
+ MOVB $1, ret+48(FP)
+ RET
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT,$0-25
+ MOVQ a+0(FP), SI
+ MOVQ b+8(FP), DI
+ CMPQ SI, DI
+ JEQ eq
+ MOVQ size+16(FP), BX
+ LEAQ ret+24(FP), AX
+ JMP memeqbody<>(SB)
+eq:
+ MOVB $1, ret+24(FP)
+ RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17
+ MOVQ a+0(FP), SI
+ MOVQ b+8(FP), DI
+ CMPQ SI, DI
+ JEQ eq
+ MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
+ LEAQ ret+16(FP), AX
+ JMP memeqbody<>(SB)
+eq:
+ MOVB $1, ret+16(FP)
+ RET
+
+// a in SI
+// b in DI
+// count in BX
+// address of result byte in AX
+TEXT memeqbody<>(SB),NOSPLIT,$0-0
+ CMPQ BX, $8
+ JB small
+ CMPQ BX, $64
+ JB bigloop
+ CMPB internal∕cpu·X86+const_x86_HasAVX2(SB), $1
+ JE hugeloop_avx2
+
+ // 64 bytes at a time using xmm registers
+hugeloop:
+ CMPQ BX, $64
+ JB bigloop
+ MOVOU (SI), X0
+ MOVOU (DI), X1
+ MOVOU 16(SI), X2
+ MOVOU 16(DI), X3
+ MOVOU 32(SI), X4
+ MOVOU 32(DI), X5
+ MOVOU 48(SI), X6
+ MOVOU 48(DI), X7
+ PCMPEQB X1, X0
+ PCMPEQB X3, X2
+ PCMPEQB X5, X4
+ PCMPEQB X7, X6
+ PAND X2, X0
+ PAND X6, X4
+ PAND X4, X0
+ PMOVMSKB X0, DX
+ ADDQ $64, SI
+ ADDQ $64, DI
+ SUBQ $64, BX
+ CMPL DX, $0xffff
+ JEQ hugeloop
+ MOVB $0, (AX)
+ RET
+
+ // 64 bytes at a time using ymm registers
+hugeloop_avx2:
+ CMPQ BX, $64
+ JB bigloop_avx2
+ VMOVDQU (SI), Y0
+ VMOVDQU (DI), Y1
+ VMOVDQU 32(SI), Y2
+ VMOVDQU 32(DI), Y3
+ VPCMPEQB Y1, Y0, Y4
+ VPCMPEQB Y2, Y3, Y5
+ VPAND Y4, Y5, Y6
+ VPMOVMSKB Y6, DX
+ ADDQ $64, SI
+ ADDQ $64, DI
+ SUBQ $64, BX
+ CMPL DX, $0xffffffff
+ JEQ hugeloop_avx2
+ VZEROUPPER
+ MOVB $0, (AX)
+ RET
+
+bigloop_avx2:
+ VZEROUPPER
+
+ // 8 bytes at a time using 64-bit register
+bigloop:
+ CMPQ BX, $8
+ JBE leftover
+ MOVQ (SI), CX
+ MOVQ (DI), DX
+ ADDQ $8, SI
+ ADDQ $8, DI
+ SUBQ $8, BX
+ CMPQ CX, DX
+ JEQ bigloop
+ MOVB $0, (AX)
+ RET
+
+ // remaining 0-8 bytes
+leftover:
+ MOVQ -8(SI)(BX*1), CX
+ MOVQ -8(DI)(BX*1), DX
+ CMPQ CX, DX
+ SETEQ (AX)
+ RET
+
+small:
+ CMPQ BX, $0
+ JEQ equal
+
+ LEAQ 0(BX*8), CX
+ NEGQ CX
+
+ CMPB SI, $0xf8
+ JA si_high
+
+ // load at SI won't cross a page boundary.
+ MOVQ (SI), SI
+ JMP si_finish
+si_high:
+ // address ends in 11111xxx. Load up to bytes we want, move to correct position.
+ MOVQ -8(SI)(BX*1), SI
+ SHRQ CX, SI
+si_finish:
+
+ // same for DI.
+ CMPB DI, $0xf8
+ JA di_high
+ MOVQ (DI), DI
+ JMP di_finish
+di_high:
+ MOVQ -8(DI)(BX*1), DI
+ SHRQ CX, DI
+di_finish:
+
+ SUBQ SI, DI
+ SHLQ CX, DI
+equal:
+ SETEQ (AX)
+ RET
+
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Equal(SB),NOSPLIT,$0-25
+ MOVL a_len+4(FP), BX
+ MOVL b_len+16(FP), CX
+ CMPL BX, CX
+ JNE neq
+ MOVL a_base+0(FP), SI
+ MOVL b_base+12(FP), DI
+ CMPL SI, DI
+ JEQ eq
+ CALL memeqbody<>(SB)
+ MOVB AX, ret+24(FP)
+ RET
+neq:
+ MOVB $0, ret+24(FP)
+ RET
+eq:
+ MOVB $1, ret+24(FP)
+ RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-25
+ MOVL a_len+4(FP), BX
+ MOVL b_len+16(FP), CX
+ CMPL BX, CX
+ JNE neq
+ MOVL a_base+0(FP), SI
+ MOVL b_base+12(FP), DI
+ CMPL SI, DI
+ JEQ eq
+ CALL memeqbody<>(SB)
+ MOVB AX, ret+24(FP)
+ RET
+neq:
+ MOVB $0, ret+24(FP)
+ RET
+eq:
+ MOVB $1, ret+24(FP)
+ RET
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT,$0-17
+ MOVL a+0(FP), SI
+ MOVL b+4(FP), DI
+ CMPL SI, DI
+ JEQ eq
+ MOVL size+8(FP), BX
+ CALL memeqbody<>(SB)
+ MOVB AX, ret+16(FP)
+ RET
+eq:
+ MOVB $1, ret+16(FP)
+ RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
+ MOVL a+0(FP), SI
+ MOVL b+4(FP), DI
+ CMPL SI, DI
+ JEQ eq
+ MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
+ CALL memeqbody<>(SB)
+ MOVB AX, ret+8(FP)
+ RET
+eq:
+ MOVB $1, ret+8(FP)
+ RET
+
+// a in SI
+// b in DI
+// count in BX
+TEXT memeqbody<>(SB),NOSPLIT,$0-0
+ XORQ AX, AX
+
+ CMPQ BX, $8
+ JB small
+
+ // 64 bytes at a time using xmm registers
+hugeloop:
+ CMPQ BX, $64
+ JB bigloop
+ MOVOU (SI), X0
+ MOVOU (DI), X1
+ MOVOU 16(SI), X2
+ MOVOU 16(DI), X3
+ MOVOU 32(SI), X4
+ MOVOU 32(DI), X5
+ MOVOU 48(SI), X6
+ MOVOU 48(DI), X7
+ PCMPEQB X1, X0
+ PCMPEQB X3, X2
+ PCMPEQB X5, X4
+ PCMPEQB X7, X6
+ PAND X2, X0
+ PAND X6, X4
+ PAND X4, X0
+ PMOVMSKB X0, DX
+ ADDQ $64, SI
+ ADDQ $64, DI
+ SUBQ $64, BX
+ CMPL DX, $0xffff
+ JEQ hugeloop
+ RET
+
+ // 8 bytes at a time using 64-bit register
+bigloop:
+ CMPQ BX, $8
+ JBE leftover
+ MOVQ (SI), CX
+ MOVQ (DI), DX
+ ADDQ $8, SI
+ ADDQ $8, DI
+ SUBQ $8, BX
+ CMPQ CX, DX
+ JEQ bigloop
+ RET
+
+ // remaining 0-8 bytes
+leftover:
+ ADDQ BX, SI
+ ADDQ BX, DI
+ MOVQ -8(SI), CX
+ MOVQ -8(DI), DX
+ CMPQ CX, DX
+ SETEQ AX
+ RET
+
+small:
+ CMPQ BX, $0
+ JEQ equal
+
+ LEAQ 0(BX*8), CX
+ NEGQ CX
+
+ CMPB SI, $0xf8
+ JA si_high
+
+ // load at SI won't cross a page boundary.
+ MOVQ (SI), SI
+ JMP si_finish
+si_high:
+ // address ends in 11111xxx. Load up to bytes we want, move to correct position.
+ MOVQ BX, DX
+ ADDQ SI, DX
+ MOVQ -8(DX), SI
+ SHRQ CX, SI
+si_finish:
+
+ // same for DI.
+ CMPB DI, $0xf8
+ JA di_high
+ MOVQ (DI), DI
+ JMP di_finish
+di_high:
+ MOVQ BX, DX
+ ADDQ DI, DX
+ MOVQ -8(DX), DI
+ SHRQ CX, DI
+di_finish:
+
+ SUBQ SI, DI
+ SHLQ CX, DI
+equal:
+ SETEQ AX
+ RET
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+// TODO: share code with memequal?
+TEXT ·Equal(SB),NOSPLIT,$0-25
+ MOVW a_len+4(FP), R1
+ MOVW b_len+16(FP), R3
+
+ CMP R1, R3 // unequal lengths are not equal
+ B.NE notequal
+
+ MOVW a_base+0(FP), R0
+ MOVW b_base+12(FP), R2
+ ADD R0, R1 // end
+
+loop:
+ CMP R0, R1
+ B.EQ equal // reached the end
+ MOVBU.P 1(R0), R4
+ MOVBU.P 1(R2), R5
+ CMP R4, R5
+ B.EQ loop
+
+notequal:
+ MOVW $0, R0
+ MOVBU R0, ret+24(FP)
+ RET
+
+equal:
+ MOVW $1, R0
+ MOVBU R0, ret+24(FP)
+ RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-25
+ JMP ·Equal(SB)
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-13
+ MOVW a+0(FP), R1
+ MOVW b+4(FP), R2
+ MOVW size+8(FP), R3
+ ADD R1, R3, R6
+ MOVW $1, R0
+ MOVB R0, ret+12(FP)
+ CMP R1, R2
+ RET.EQ
+loop:
+ CMP R1, R6
+ RET.EQ
+ MOVBU.P 1(R1), R4
+ MOVBU.P 1(R2), R5
+ CMP R4, R5
+ BEQ loop
+
+ MOVW $0, R0
+ MOVB R0, ret+12(FP)
+ RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$16-9
+ MOVW a+0(FP), R0
+ MOVW b+4(FP), R1
+ CMP R0, R1
+ BEQ eq
+ MOVW 4(R7), R2 // compiler stores size at offset 4 in the closure
+ MOVW R0, 4(R13)
+ MOVW R1, 8(R13)
+ MOVW R2, 12(R13)
+ BL runtime·memequal(SB)
+ MOVB 16(R13), R0
+ MOVB R0, ret+8(FP)
+ RET
+eq:
+ MOVW $1, R0
+ MOVB R0, ret+8(FP)
+ RET
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Equal(SB),NOSPLIT,$0-49
+ MOVD a_len+8(FP), R1
+ MOVD b_len+32(FP), R3
+ CMP R1, R3
+ // unequal lengths are not equal
+ BNE not_equal
+ // short path to handle 0-byte case
+ CBZ R1, equal
+ MOVD a_base+0(FP), R0
+ MOVD b_base+24(FP), R2
+ MOVD $ret+48(FP), R8
+ B memeqbody<>(SB)
+equal:
+ MOVD $1, R0
+ MOVB R0, ret+48(FP)
+ RET
+not_equal:
+ MOVB ZR, ret+48(FP)
+ RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-49
+ MOVD a_len+8(FP), R1
+ MOVD b_len+32(FP), R3
+ CMP R1, R3
+ // unequal lengths are not equal
+ BNE not_equal
+ // short path to handle 0-byte case
+ CBZ R1, equal
+ MOVD a_base+0(FP), R0
+ MOVD b_base+24(FP), R2
+ MOVD $ret+48(FP), R8
+ B memeqbody<>(SB)
+equal:
+ MOVD $1, R0
+ MOVB R0, ret+48(FP)
+ RET
+not_equal:
+ MOVB ZR, ret+48(FP)
+ RET
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+ MOVD size+16(FP), R1
+ // short path to handle 0-byte case
+ CBZ R1, equal
+ MOVD a+0(FP), R0
+ MOVD b+8(FP), R2
+ MOVD $ret+24(FP), R8
+ B memeqbody<>(SB)
+equal:
+ MOVD $1, R0
+ MOVB R0, ret+24(FP)
+ RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
+ MOVD a+0(FP), R3
+ MOVD b+8(FP), R4
+ CMP R3, R4
+ BEQ eq
+ MOVD 8(R26), R5 // compiler stores size at offset 8 in the closure
+ MOVD R3, 8(RSP)
+ MOVD R4, 16(RSP)
+ MOVD R5, 24(RSP)
+ BL runtime·memequal(SB)
+ MOVBU 32(RSP), R3
+ MOVB R3, ret+16(FP)
+ RET
+eq:
+ MOVD $1, R3
+ MOVB R3, ret+16(FP)
+ RET
+
+// input:
+// R0: pointer a
+// R1: data len
+// R2: pointer b
+// R8: address to put result
+TEXT memeqbody<>(SB),NOSPLIT,$0
+ CMP $1, R1
+ // handle 1-byte special case for better performance
+ BEQ one
+ CMP $16, R1
+ // handle specially if length < 16
+ BLO tail
+ BIC $0x3f, R1, R3
+ CBZ R3, chunk16
+ // work with 64-byte chunks
+ ADD R3, R0, R6 // end of chunks
+chunk64_loop:
+ VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2]
+ VLD1.P (R2), [V4.D2, V5.D2, V6.D2, V7.D2]
+ VCMEQ V0.D2, V4.D2, V8.D2
+ VCMEQ V1.D2, V5.D2, V9.D2
+ VCMEQ V2.D2, V6.D2, V10.D2
+ VCMEQ V3.D2, V7.D2, V11.D2
+ VAND V8.B16, V9.B16, V8.B16
+ VAND V8.B16, V10.B16, V8.B16
+ VAND V8.B16, V11.B16, V8.B16
+ CMP R0, R6
+ VMOV V8.D[0], R4
+ VMOV V8.D[1], R5
+ CBZ R4, not_equal
+ CBZ R5, not_equal
+ BNE chunk64_loop
+ AND $0x3f, R1, R1
+ CBZ R1, equal
+chunk16:
+ // work with 16-byte chunks
+ BIC $0xf, R1, R3
+ CBZ R3, tail
+ ADD R3, R0, R6 // end of chunks
+chunk16_loop:
+ VLD1.P (R0), [V0.D2]
+ VLD1.P (R2), [V1.D2]
+ VCMEQ V0.D2, V1.D2, V2.D2
+ CMP R0, R6
+ VMOV V2.D[0], R4
+ VMOV V2.D[1], R5
+ CBZ R4, not_equal
+ CBZ R5, not_equal
+ BNE chunk16_loop
+ AND $0xf, R1, R1
+ CBZ R1, equal
+tail:
+ // special compare of tail with length < 16
+ TBZ $3, R1, lt_8
+ MOVD.P 8(R0), R4
+ MOVD.P 8(R2), R5
+ CMP R4, R5
+ BNE not_equal
+lt_8:
+ TBZ $2, R1, lt_4
+ MOVWU.P 4(R0), R4
+ MOVWU.P 4(R2), R5
+ CMP R4, R5
+ BNE not_equal
+lt_4:
+ TBZ $1, R1, lt_2
+ MOVHU.P 2(R0), R4
+ MOVHU.P 2(R2), R5
+ CMP R4, R5
+ BNE not_equal
+lt_2:
+ TBZ $0, R1, equal
+one:
+ MOVBU (R0), R4
+ MOVBU (R2), R5
+ CMP R4, R5
+ BNE not_equal
+equal:
+ MOVD $1, R0
+ MOVB R0, (R8)
+ RET
+not_equal:
+ MOVB ZR, (R8)
+ RET
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mips64 mips64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+#define REGCTXT R22
+
+TEXT ·Equal(SB),NOSPLIT,$0-49
+ MOVV a_len+8(FP), R3
+ MOVV b_len+32(FP), R4
+ BNE R3, R4, noteq // unequal lengths are not equal
+
+ MOVV a_base+0(FP), R1
+ MOVV b_base+24(FP), R2
+ ADDV R1, R3 // end
+
+loop:
+ BEQ R1, R3, equal // reached the end
+ MOVBU (R1), R6
+ ADDV $1, R1
+ MOVBU (R2), R7
+ ADDV $1, R2
+ BEQ R6, R7, loop
+
+noteq:
+ MOVB R0, ret+48(FP)
+ RET
+
+equal:
+ MOVV $1, R1
+ MOVB R1, ret+48(FP)
+ RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-49
+ JMP ·Equal(SB)
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+ MOVV a+0(FP), R1
+ MOVV b+8(FP), R2
+ BEQ R1, R2, eq
+ MOVV size+16(FP), R3
+ ADDV R1, R3, R4
+loop:
+ BNE R1, R4, test
+ MOVV $1, R1
+ MOVB R1, ret+24(FP)
+ RET
+test:
+ MOVBU (R1), R6
+ ADDV $1, R1
+ MOVBU (R2), R7
+ ADDV $1, R2
+ BEQ R6, R7, loop
+
+ MOVB R0, ret+24(FP)
+ RET
+eq:
+ MOVV $1, R1
+ MOVB R1, ret+24(FP)
+ RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
+ MOVV a+0(FP), R1
+ MOVV b+8(FP), R2
+ BEQ R1, R2, eq
+ MOVV 8(REGCTXT), R3 // compiler stores size at offset 8 in the closure
+ MOVV R1, 8(R29)
+ MOVV R2, 16(R29)
+ MOVV R3, 24(R29)
+ JAL runtime·memequal(SB)
+ MOVBU 32(R29), R1
+ MOVB R1, ret+16(FP)
+ RET
+eq:
+ MOVV $1, R1
+ MOVB R1, ret+16(FP)
+ RET
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build mips mipsle
+
+#include "go_asm.h"
+#include "textflag.h"
+
+#define REGCTXT R22
+
+TEXT ·Equal(SB),NOSPLIT,$0-25
+ MOVW a_len+4(FP), R3
+ MOVW b_len+16(FP), R4
+ BNE R3, R4, noteq // unequal lengths are not equal
+
+ MOVW a_base+0(FP), R1
+ MOVW b_base+12(FP), R2
+ ADDU R1, R3 // end
+
+loop:
+ BEQ R1, R3, equal // reached the end
+ MOVBU (R1), R6
+ ADDU $1, R1
+ MOVBU (R2), R7
+ ADDU $1, R2
+ BEQ R6, R7, loop
+
+noteq:
+ MOVB R0, ret+24(FP)
+ RET
+
+equal:
+ MOVW $1, R1
+ MOVB R1, ret+24(FP)
+ RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-25
+ JMP ·Equal(SB)
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT,$0-13
+ MOVW a+0(FP), R1
+ MOVW b+4(FP), R2
+ BEQ R1, R2, eq
+ MOVW size+8(FP), R3
+ ADDU R1, R3, R4
+loop:
+ BNE R1, R4, test
+ MOVW $1, R1
+ MOVB R1, ret+12(FP)
+ RET
+test:
+ MOVBU (R1), R6
+ ADDU $1, R1
+ MOVBU (R2), R7
+ ADDU $1, R2
+ BEQ R6, R7, loop
+
+ MOVB R0, ret+12(FP)
+ RET
+eq:
+ MOVW $1, R1
+ MOVB R1, ret+12(FP)
+ RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
+ MOVW a+0(FP), R1
+ MOVW b+4(FP), R2
+ BEQ R1, R2, eq
+ MOVW 4(REGCTXT), R3 // compiler stores size at offset 4 in the closure
+ ADDU R1, R3, R4
+loop:
+ BNE R1, R4, test
+ MOVW $1, R1
+ MOVB R1, ret+8(FP)
+ RET
+test:
+ MOVBU (R1), R6
+ ADDU $1, R1
+ MOVBU (R2), R7
+ ADDU $1, R2
+ BEQ R6, R7, loop
+
+ MOVB R0, ret+8(FP)
+ RET
+eq:
+ MOVW $1, R1
+ MOVB R1, ret+8(FP)
+ RET
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bytealg
+
+import (
+ "internal/cpu"
+ "unsafe"
+)
+
+// Note: there's no equal_generic.go because every platform must implement at least memequal_varlen in assembly.
+
+// Because equal_native.go is unconditional, it's a good place to compute asm constants.
+// TODO: find a better way to do this?
+
+// Offsets into internal/cpu records for use in assembly.
+const x86_HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
+const x86_HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
+const s390x_HasVX = unsafe.Offsetof(cpu.S390X.HasVX)
+
+//go:noescape
+func Equal(a, b []byte) bool
+
+// The compiler generates calls to runtime.memequal and runtime.memequal_varlen.
+// In addition, the runtime calls runtime.memequal explicitly.
+// Those functions are implemented in this package.
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ppc64 ppc64le
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Equal(SB),NOSPLIT,$0-49
+ MOVD a_len+8(FP), R4
+ MOVD b_len+32(FP), R5
+ CMP R5, R4 // unequal lengths are not equal
+ BNE noteq
+ MOVD a_base+0(FP), R3
+ MOVD b_base+24(FP), R4
+ BL memeqbody<>(SB)
+
+ MOVBZ R9,ret+48(FP)
+ RET
+
+noteq:
+ MOVBZ $0,ret+48(FP)
+ RET
+
+equal:
+ MOVD $1,R3
+ MOVBZ R3,ret+48(FP)
+ RET
+
+TEXT bytes·Equal(SB),NOSPLIT,$0-49
+ MOVD a_len+8(FP), R4
+ MOVD b_len+32(FP), R5
+ CMP R5, R4 // unequal lengths are not equal
+ BNE noteq
+ MOVD a_base+0(FP), R3
+ MOVD b_base+24(FP), R4
+ BL memeqbody<>(SB)
+
+ MOVBZ R9,ret+48(FP)
+ RET
+
+noteq:
+ MOVBZ $0,ret+48(FP)
+ RET
+
+equal:
+ MOVD $1,R3
+ MOVBZ R3,ret+48(FP)
+ RET
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT,$0-25
+ MOVD a+0(FP), R3
+ MOVD b+8(FP), R4
+ MOVD size+16(FP), R5
+
+ BL memeqbody<>(SB)
+ MOVB R9, ret+24(FP)
+ RET
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
+ MOVD a+0(FP), R3
+ MOVD b+8(FP), R4
+ CMP R3, R4
+ BEQ eq
+ MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure
+ BL memeqbody<>(SB)
+ MOVB R9, ret+16(FP)
+ RET
+eq:
+ MOVD $1, R3
+ MOVB R3, ret+16(FP)
+ RET
+
+// Do an efficient memequal for ppc64
+// R3 = s1
+// R4 = s2
+// R5 = len
+// R9 = return value
+TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
+ MOVD R5,CTR
+ CMP R5,$8 // only optimize >=8
+ BLT simplecheck
+ DCBT (R3) // cache hint
+ DCBT (R4)
+ CMP R5,$32 // optimize >= 32
+ MOVD R5,R6 // needed if setup8a branch
+ BLT setup8a // 8 byte moves only
+setup32a: // 8 byte aligned, >= 32 bytes
+ SRADCC $5,R5,R6 // number of 32 byte chunks to compare
+ MOVD R6,CTR
+loop32a:
+ MOVD 0(R3),R6 // doublewords to compare
+ MOVD 0(R4),R7
+ MOVD 8(R3),R8 //
+ MOVD 8(R4),R9
+ CMP R6,R7 // bytes batch?
+ BNE noteq
+ MOVD 16(R3),R6
+ MOVD 16(R4),R7
+ CMP R8,R9 // bytes match?
+ MOVD 24(R3),R8
+ MOVD 24(R4),R9
+ BNE noteq
+ CMP R6,R7 // bytes match?
+ BNE noteq
+ ADD $32,R3 // bump up to next 32
+ ADD $32,R4
+ CMP R8,R9 // bytes match?
+ BC 8,2,loop32a // br ctr and cr
+ BNE noteq
+ ANDCC $24,R5,R6 // Any 8 byte chunks?
+ BEQ leftover // and result is 0
+setup8a:
+ SRADCC $3,R6,R6 // get the 8 byte count
+ BEQ leftover // shifted value is 0
+ MOVD R6,CTR
+loop8:
+ MOVD 0(R3),R6 // doublewords to compare
+ ADD $8,R3
+ MOVD 0(R4),R7
+ ADD $8,R4
+ CMP R6,R7 // match?
+ BC 8,2,loop8 // bt ctr <> 0 && cr
+ BNE noteq
+leftover:
+ ANDCC $7,R5,R6 // check for leftover bytes
+ BEQ equal
+ MOVD R6,CTR
+ BR simple
+simplecheck:
+ CMP R5,$0
+ BEQ equal
+simple:
+ MOVBZ 0(R3), R6
+ ADD $1,R3
+ MOVBZ 0(R4), R7
+ ADD $1,R4
+ CMP R6, R7
+ BNE noteq
+ BC 8,2,simple
+ BNE noteq
+ BR equal
+noteq:
+ MOVD $0, R9
+ RET
+equal:
+ MOVD $1, R9
+ RET
+
--- /dev/null
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "go_asm.h"
+#include "textflag.h"
+
+TEXT ·Equal(SB),NOSPLIT|NOFRAME,$0-49
+ MOVD a_len+8(FP), R2
+ MOVD b_len+32(FP), R6
+ MOVD a_base+0(FP), R3
+ MOVD b_base+24(FP), R5
+ LA ret+48(FP), R7
+ CMPBNE R2, R6, notequal
+ BR memeqbody<>(SB)
+notequal:
+ MOVB $0, ret+48(FP)
+ RET
+
+TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
+ MOVD a_len+8(FP), R2
+ MOVD b_len+32(FP), R6
+ MOVD a_base+0(FP), R3
+ MOVD b_base+24(FP), R5
+ LA ret+48(FP), R7
+ CMPBNE R2, R6, notequal
+ BR memeqbody<>(SB)
+notequal:
+ MOVB $0, ret+48(FP)
+ RET
+
+// memequal(a, b unsafe.Pointer, size uintptr) bool
+TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
+ MOVD a+0(FP), R3
+ MOVD b+8(FP), R5
+ MOVD size+16(FP), R6
+ LA ret+24(FP), R7
+ BR memeqbody<>(SB)
+
+// memequal_varlen(a, b unsafe.Pointer) bool
+TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
+ MOVD a+0(FP), R3
+ MOVD b+8(FP), R5
+ MOVD 8(R12), R6 // compiler stores size at offset 8 in the closure
+ LA ret+16(FP), R7
+ BR memeqbody<>(SB)
+
+// input:
+// R3 = a
+// R5 = b
+// R6 = len
+// R7 = address of output byte (stores 0 or 1 here)
+// a and b have the same length
+TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
+ CMPBEQ R3, R5, equal
+loop:
+ CMPBEQ R6, $0, equal
+ CMPBLT R6, $32, tiny
+ CMP R6, $256
+ BLT tail
+ CLC $256, 0(R3), 0(R5)
+ BNE notequal
+ SUB $256, R6
+ LA 256(R3), R3
+ LA 256(R5), R5
+ BR loop
+tail:
+ SUB $1, R6, R8
+ EXRL $memeqbodyclc<>(SB), R8
+ BEQ equal
+notequal:
+ MOVB $0, 0(R7)
+ RET
+equal:
+ MOVB $1, 0(R7)
+ RET
+tiny:
+ MOVD $0, R2
+ CMPBLT R6, $16, lt16
+ MOVD 0(R3), R8
+ MOVD 0(R5), R9
+ CMPBNE R8, R9, notequal
+ MOVD 8(R3), R8
+ MOVD 8(R5), R9
+ CMPBNE R8, R9, notequal
+ LA 16(R2), R2
+ SUB $16, R6
+lt16:
+ CMPBLT R6, $8, lt8
+ MOVD 0(R3)(R2*1), R8
+ MOVD 0(R5)(R2*1), R9
+ CMPBNE R8, R9, notequal
+ LA 8(R2), R2
+ SUB $8, R6
+lt8:
+ CMPBLT R6, $4, lt4
+ MOVWZ 0(R3)(R2*1), R8
+ MOVWZ 0(R5)(R2*1), R9
+ CMPBNE R8, R9, notequal
+ LA 4(R2), R2
+ SUB $4, R6
+lt4:
+#define CHECK(n) \
+ CMPBEQ R6, $n, equal \
+ MOVB n(R3)(R2*1), R8 \
+ MOVB n(R5)(R2*1), R9 \
+ CMPBNE R8, R9, notequal
+ CHECK(0)
+ CHECK(1)
+ CHECK(2)
+ CHECK(3)
+ BR equal
+
+TEXT memeqbodyclc<>(SB),NOSPLIT|NOFRAME,$0-0
+ CLC $1, 0(R3), 0(R5)
+ RET
package bytealg
-import (
- "internal/cpu"
- "unsafe"
-)
-
-// Offsets into internal/cpu records for use in assembly
-// TODO: find a better way to do this?
-const x86_HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
-const s390x_HasVX = unsafe.Offsetof(cpu.S390X.HasVX)
-
//go:noescape
func IndexByte(b []byte, c byte) int
SETEQ ret+0(FP)
RET
-// memequal(p, q unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT,$0-13
- MOVL a+0(FP), SI
- MOVL b+4(FP), DI
- CMPL SI, DI
- JEQ eq
- MOVL size+8(FP), BX
- LEAL ret+12(FP), AX
- JMP runtime·memeqbody(SB)
-eq:
- MOVB $1, ret+12(FP)
- RET
-
-// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
- MOVL a+0(FP), SI
- MOVL b+4(FP), DI
- CMPL SI, DI
- JEQ eq
- MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
- LEAL ret+8(FP), AX
- JMP runtime·memeqbody(SB)
-eq:
- MOVB $1, ret+8(FP)
- RET
-
-TEXT bytes·Equal(SB),NOSPLIT,$0-25
- MOVL a_len+4(FP), BX
- MOVL b_len+16(FP), CX
- CMPL BX, CX
- JNE eqret
- MOVL a+0(FP), SI
- MOVL b+12(FP), DI
- LEAL ret+24(FP), AX
- JMP runtime·memeqbody(SB)
-eqret:
- MOVB $0, ret+24(FP)
- RET
-
-// a in SI
-// b in DI
-// count in BX
-// address of result byte in AX
-TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
- CMPL BX, $4
- JB small
-
- // 64 bytes at a time using xmm registers
-hugeloop:
- CMPL BX, $64
- JB bigloop
- CMPB runtime·support_sse2(SB), $1
- JNE bigloop
- MOVOU (SI), X0
- MOVOU (DI), X1
- MOVOU 16(SI), X2
- MOVOU 16(DI), X3
- MOVOU 32(SI), X4
- MOVOU 32(DI), X5
- MOVOU 48(SI), X6
- MOVOU 48(DI), X7
- PCMPEQB X1, X0
- PCMPEQB X3, X2
- PCMPEQB X5, X4
- PCMPEQB X7, X6
- PAND X2, X0
- PAND X6, X4
- PAND X4, X0
- PMOVMSKB X0, DX
- ADDL $64, SI
- ADDL $64, DI
- SUBL $64, BX
- CMPL DX, $0xffff
- JEQ hugeloop
- MOVB $0, (AX)
- RET
-
- // 4 bytes at a time using 32-bit register
-bigloop:
- CMPL BX, $4
- JBE leftover
- MOVL (SI), CX
- MOVL (DI), DX
- ADDL $4, SI
- ADDL $4, DI
- SUBL $4, BX
- CMPL CX, DX
- JEQ bigloop
- MOVB $0, (AX)
- RET
-
- // remaining 0-4 bytes
-leftover:
- MOVL -4(SI)(BX*1), CX
- MOVL -4(DI)(BX*1), DX
- CMPL CX, DX
- SETEQ (AX)
- RET
-
-small:
- CMPL BX, $0
- JEQ equal
-
- LEAL 0(BX*8), CX
- NEGL CX
-
- MOVL SI, DX
- CMPB DX, $0xfc
- JA si_high
-
- // load at SI won't cross a page boundary.
- MOVL (SI), SI
- JMP si_finish
-si_high:
- // address ends in 111111xx. Load up to bytes we want, move to correct position.
- MOVL -4(SI)(BX*1), SI
- SHRL CX, SI
-si_finish:
-
- // same for DI.
- MOVL DI, DX
- CMPB DX, $0xfc
- JA di_high
- MOVL (DI), DI
- JMP di_finish
-di_high:
- MOVL -4(DI)(BX*1), DI
- SHRL CX, DI
-di_finish:
-
- SUBL SI, DI
- SHLL CX, DI
-equal:
- SETEQ (AX)
- RET
-
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVL s1_base+0(FP), SI
MOVL s1_len+4(FP), BX
DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
GLOBL shifts<>(SB),RODATA,$256
-// memequal(p, q unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT,$0-25
- MOVQ a+0(FP), SI
- MOVQ b+8(FP), DI
- CMPQ SI, DI
- JEQ eq
- MOVQ size+16(FP), BX
- LEAQ ret+24(FP), AX
- JMP runtime·memeqbody(SB)
-eq:
- MOVB $1, ret+24(FP)
- RET
-
-// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-17
- MOVQ a+0(FP), SI
- MOVQ b+8(FP), DI
- CMPQ SI, DI
- JEQ eq
- MOVQ 8(DX), BX // compiler stores size at offset 8 in the closure
- LEAQ ret+16(FP), AX
- JMP runtime·memeqbody(SB)
-eq:
- MOVB $1, ret+16(FP)
- RET
-
-// a in SI
-// b in DI
-// count in BX
-// address of result byte in AX
-TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
- CMPQ BX, $8
- JB small
- CMPQ BX, $64
- JB bigloop
- CMPB runtime·support_avx2(SB), $1
- JE hugeloop_avx2
-
- // 64 bytes at a time using xmm registers
-hugeloop:
- CMPQ BX, $64
- JB bigloop
- MOVOU (SI), X0
- MOVOU (DI), X1
- MOVOU 16(SI), X2
- MOVOU 16(DI), X3
- MOVOU 32(SI), X4
- MOVOU 32(DI), X5
- MOVOU 48(SI), X6
- MOVOU 48(DI), X7
- PCMPEQB X1, X0
- PCMPEQB X3, X2
- PCMPEQB X5, X4
- PCMPEQB X7, X6
- PAND X2, X0
- PAND X6, X4
- PAND X4, X0
- PMOVMSKB X0, DX
- ADDQ $64, SI
- ADDQ $64, DI
- SUBQ $64, BX
- CMPL DX, $0xffff
- JEQ hugeloop
- MOVB $0, (AX)
- RET
-
- // 64 bytes at a time using ymm registers
-hugeloop_avx2:
- CMPQ BX, $64
- JB bigloop_avx2
- VMOVDQU (SI), Y0
- VMOVDQU (DI), Y1
- VMOVDQU 32(SI), Y2
- VMOVDQU 32(DI), Y3
- VPCMPEQB Y1, Y0, Y4
- VPCMPEQB Y2, Y3, Y5
- VPAND Y4, Y5, Y6
- VPMOVMSKB Y6, DX
- ADDQ $64, SI
- ADDQ $64, DI
- SUBQ $64, BX
- CMPL DX, $0xffffffff
- JEQ hugeloop_avx2
- VZEROUPPER
- MOVB $0, (AX)
- RET
-
-bigloop_avx2:
- VZEROUPPER
-
- // 8 bytes at a time using 64-bit register
-bigloop:
- CMPQ BX, $8
- JBE leftover
- MOVQ (SI), CX
- MOVQ (DI), DX
- ADDQ $8, SI
- ADDQ $8, DI
- SUBQ $8, BX
- CMPQ CX, DX
- JEQ bigloop
- MOVB $0, (AX)
- RET
-
- // remaining 0-8 bytes
-leftover:
- MOVQ -8(SI)(BX*1), CX
- MOVQ -8(DI)(BX*1), DX
- CMPQ CX, DX
- SETEQ (AX)
- RET
-
-small:
- CMPQ BX, $0
- JEQ equal
-
- LEAQ 0(BX*8), CX
- NEGQ CX
-
- CMPB SI, $0xf8
- JA si_high
-
- // load at SI won't cross a page boundary.
- MOVQ (SI), SI
- JMP si_finish
-si_high:
- // address ends in 11111xxx. Load up to bytes we want, move to correct position.
- MOVQ -8(SI)(BX*1), SI
- SHRQ CX, SI
-si_finish:
-
- // same for DI.
- CMPB DI, $0xf8
- JA di_high
- MOVQ (DI), DI
- JMP di_finish
-di_high:
- MOVQ -8(DI)(BX*1), DI
- SHRQ CX, DI
-di_finish:
-
- SUBQ SI, DI
- SHLQ CX, DI
-equal:
- SETEQ (AX)
- RET
-
TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
MOVQ s1_base+0(FP), SI
MOVQ s1_len+8(FP), BX
MOVQ DI, (R11)
RET
-TEXT bytes·Equal(SB),NOSPLIT,$0-49
- MOVQ a_len+8(FP), BX
- MOVQ b_len+32(FP), CX
- CMPQ BX, CX
- JNE eqret
- MOVQ a+0(FP), SI
- MOVQ b+24(FP), DI
- LEAQ ret+48(FP), AX
- JMP runtime·memeqbody(SB)
-eqret:
- MOVB $0, ret+48(FP)
- RET
-
-
TEXT bytes·countByte(SB),NOSPLIT,$0-40
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), BX
MOVL AX, ret+8(FP)
RET
-// memequal(p, q unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT,$0-17
- MOVL a+0(FP), SI
- MOVL b+4(FP), DI
- CMPL SI, DI
- JEQ eq
- MOVL size+8(FP), BX
- CALL runtime·memeqbody(SB)
- MOVB AX, ret+16(FP)
- RET
-eq:
- MOVB $1, ret+16(FP)
- RET
-
-// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
- MOVL a+0(FP), SI
- MOVL b+4(FP), DI
- CMPL SI, DI
- JEQ eq
- MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
- CALL runtime·memeqbody(SB)
- MOVB AX, ret+8(FP)
- RET
-eq:
- MOVB $1, ret+8(FP)
- RET
-
-// a in SI
-// b in DI
-// count in BX
-TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
- XORQ AX, AX
-
- CMPQ BX, $8
- JB small
-
- // 64 bytes at a time using xmm registers
-hugeloop:
- CMPQ BX, $64
- JB bigloop
- MOVOU (SI), X0
- MOVOU (DI), X1
- MOVOU 16(SI), X2
- MOVOU 16(DI), X3
- MOVOU 32(SI), X4
- MOVOU 32(DI), X5
- MOVOU 48(SI), X6
- MOVOU 48(DI), X7
- PCMPEQB X1, X0
- PCMPEQB X3, X2
- PCMPEQB X5, X4
- PCMPEQB X7, X6
- PAND X2, X0
- PAND X6, X4
- PAND X4, X0
- PMOVMSKB X0, DX
- ADDQ $64, SI
- ADDQ $64, DI
- SUBQ $64, BX
- CMPL DX, $0xffff
- JEQ hugeloop
- RET
-
- // 8 bytes at a time using 64-bit register
-bigloop:
- CMPQ BX, $8
- JBE leftover
- MOVQ (SI), CX
- MOVQ (DI), DX
- ADDQ $8, SI
- ADDQ $8, DI
- SUBQ $8, BX
- CMPQ CX, DX
- JEQ bigloop
- RET
-
- // remaining 0-8 bytes
-leftover:
- ADDQ BX, SI
- ADDQ BX, DI
- MOVQ -8(SI), CX
- MOVQ -8(DI), DX
- CMPQ CX, DX
- SETEQ AX
- RET
-
-small:
- CMPQ BX, $0
- JEQ equal
-
- LEAQ 0(BX*8), CX
- NEGQ CX
-
- CMPB SI, $0xf8
- JA si_high
-
- // load at SI won't cross a page boundary.
- MOVQ (SI), SI
- JMP si_finish
-si_high:
- // address ends in 11111xxx. Load up to bytes we want, move to correct position.
- MOVQ BX, DX
- ADDQ SI, DX
- MOVQ -8(DX), SI
- SHRQ CX, SI
-si_finish:
-
- // same for DI.
- CMPB DI, $0xf8
- JA di_high
- MOVQ (DI), DI
- JMP di_finish
-di_high:
- MOVQ BX, DX
- ADDQ DI, DX
- MOVQ -8(DX), DI
- SHRQ CX, DI
-di_finish:
-
- SUBQ SI, DI
- SHLQ CX, DI
-equal:
- SETEQ AX
- RET
-
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVL s1_base+0(FP), SI
MOVL s1_len+4(FP), BX
LEAQ -1(CX)(AX*2), AX // 1,0,-1 result
RET
-TEXT bytes·Equal(SB),NOSPLIT,$0-25
- MOVL a_len+4(FP), BX
- MOVL b_len+16(FP), CX
- XORL AX, AX
- CMPL BX, CX
- JNE eqret
- MOVL a+0(FP), SI
- MOVL b+12(FP), DI
- CALL runtime·memeqbody(SB)
-eqret:
- MOVB AX, ret+24(FP)
- RET
-
TEXT runtime·return0(SB), NOSPLIT, $0
MOVL $0, AX
RET
MOVW $0, R0
MOVW (R0), R1
-// memequal(p, q unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-13
- MOVW a+0(FP), R1
- MOVW b+4(FP), R2
- MOVW size+8(FP), R3
- ADD R1, R3, R6
- MOVW $1, R0
- MOVB R0, ret+12(FP)
- CMP R1, R2
- RET.EQ
-loop:
- CMP R1, R6
- RET.EQ
- MOVBU.P 1(R1), R4
- MOVBU.P 1(R2), R5
- CMP R4, R5
- BEQ loop
-
- MOVW $0, R0
- MOVB R0, ret+12(FP)
- RET
-
-// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT,$16-9
- MOVW a+0(FP), R0
- MOVW b+4(FP), R1
- CMP R0, R1
- BEQ eq
- MOVW 4(R7), R2 // compiler stores size at offset 4 in the closure
- MOVW R0, 4(R13)
- MOVW R1, 8(R13)
- MOVW R2, 12(R13)
- BL runtime·memequal(SB)
- MOVB 16(R13), R0
- MOVB R0, ret+8(FP)
- RET
-eq:
- MOVW $1, R0
- MOVB R0, ret+8(FP)
- RET
-
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-20
MOVW s1_base+0(FP), R2
MOVW s1_len+4(FP), R0
MOVW R0, (R7)
RET
-// TODO: share code with memequal?
-TEXT bytes·Equal(SB),NOSPLIT,$0-25
- MOVW a_len+4(FP), R1
- MOVW b_len+16(FP), R3
-
- CMP R1, R3 // unequal lengths are not equal
- B.NE notequal
-
- MOVW a+0(FP), R0
- MOVW b+12(FP), R2
- ADD R0, R1 // end
-
-loop:
- CMP R0, R1
- B.EQ equal // reached the end
- MOVBU.P 1(R0), R4
- MOVBU.P 1(R2), R5
- CMP R4, R5
- B.EQ loop
-
-notequal:
- MOVW $0, R0
- MOVBU R0, ret+24(FP)
- RET
-
-equal:
- MOVW $1, R0
- MOVBU R0, ret+24(FP)
- RET
-
TEXT runtime·return0(SB),NOSPLIT,$0
MOVW $0, R0
RET
B (ZR)
UNDEF
-// memequal(a, b unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
- MOVD size+16(FP), R1
- // short path to handle 0-byte case
- CBZ R1, equal
- MOVD a+0(FP), R0
- MOVD b+8(FP), R2
- MOVD $ret+24(FP), R8
- B runtime·memeqbody<>(SB)
-equal:
- MOVD $1, R0
- MOVB R0, ret+24(FP)
- RET
-
-// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
- MOVD a+0(FP), R3
- MOVD b+8(FP), R4
- CMP R3, R4
- BEQ eq
- MOVD 8(R26), R5 // compiler stores size at offset 8 in the closure
- MOVD R3, 8(RSP)
- MOVD R4, 16(RSP)
- MOVD R5, 24(RSP)
- BL runtime·memequal(SB)
- MOVBU 32(RSP), R3
- MOVB R3, ret+16(FP)
- RET
-eq:
- MOVD $1, R3
- MOVB R3, ret+16(FP)
- RET
-
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD s1_base+0(FP), R2
MOVD s1_len+8(FP), R0
MOVD R4, (R7)
RET
-//
-// functions for other packages
-//
-
-// Equal(a, b []byte) bool
-TEXT bytes·Equal(SB),NOSPLIT,$0-49
- MOVD a_len+8(FP), R1
- MOVD b_len+32(FP), R3
- CMP R1, R3
- // unequal lengths are not equal
- BNE not_equal
- // short path to handle 0-byte case
- CBZ R1, equal
- MOVD a+0(FP), R0
- MOVD b+24(FP), R2
- MOVD $ret+48(FP), R8
- B runtime·memeqbody<>(SB)
-equal:
- MOVD $1, R0
- MOVB R0, ret+48(FP)
- RET
-not_equal:
- MOVB ZR, ret+48(FP)
- RET
-
-// input:
-// R0: pointer a
-// R1: data len
-// R2: pointer b
-// R8: address to put result
-TEXT runtime·memeqbody<>(SB),NOSPLIT,$0
- CMP $1, R1
- // handle 1-byte special case for better performance
- BEQ one
- CMP $16, R1
- // handle specially if length < 16
- BLO tail
- BIC $0x3f, R1, R3
- CBZ R3, chunk16
- // work with 64-byte chunks
- ADD R3, R0, R6 // end of chunks
-chunk64_loop:
- VLD1.P (R0), [V0.D2, V1.D2, V2.D2, V3.D2]
- VLD1.P (R2), [V4.D2, V5.D2, V6.D2, V7.D2]
- VCMEQ V0.D2, V4.D2, V8.D2
- VCMEQ V1.D2, V5.D2, V9.D2
- VCMEQ V2.D2, V6.D2, V10.D2
- VCMEQ V3.D2, V7.D2, V11.D2
- VAND V8.B16, V9.B16, V8.B16
- VAND V8.B16, V10.B16, V8.B16
- VAND V8.B16, V11.B16, V8.B16
- CMP R0, R6
- VMOV V8.D[0], R4
- VMOV V8.D[1], R5
- CBZ R4, not_equal
- CBZ R5, not_equal
- BNE chunk64_loop
- AND $0x3f, R1, R1
- CBZ R1, equal
-chunk16:
- // work with 16-byte chunks
- BIC $0xf, R1, R3
- CBZ R3, tail
- ADD R3, R0, R6 // end of chunks
-chunk16_loop:
- VLD1.P (R0), [V0.D2]
- VLD1.P (R2), [V1.D2]
- VCMEQ V0.D2, V1.D2, V2.D2
- CMP R0, R6
- VMOV V2.D[0], R4
- VMOV V2.D[1], R5
- CBZ R4, not_equal
- CBZ R5, not_equal
- BNE chunk16_loop
- AND $0xf, R1, R1
- CBZ R1, equal
-tail:
- // special compare of tail with length < 16
- TBZ $3, R1, lt_8
- MOVD.P 8(R0), R4
- MOVD.P 8(R2), R5
- CMP R4, R5
- BNE not_equal
-lt_8:
- TBZ $2, R1, lt_4
- MOVWU.P 4(R0), R4
- MOVWU.P 4(R2), R5
- CMP R4, R5
- BNE not_equal
-lt_4:
- TBZ $1, R1, lt_2
- MOVHU.P 2(R0), R4
- MOVHU.P 2(R2), R5
- CMP R4, R5
- BNE not_equal
-lt_2:
- TBZ $0, R1, equal
-one:
- MOVBU (R0), R4
- MOVBU (R2), R5
- CMP R4, R5
- BNE not_equal
-equal:
- MOVD $1, R0
- MOVB R0, (R8)
- RET
-not_equal:
- MOVB ZR, (R8)
- RET
-
TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R0
RET
TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
MOVW (R0), R1
-// memequal(p, q unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
- MOVV a+0(FP), R1
- MOVV b+8(FP), R2
- BEQ R1, R2, eq
- MOVV size+16(FP), R3
- ADDV R1, R3, R4
-loop:
- BNE R1, R4, test
- MOVV $1, R1
- MOVB R1, ret+24(FP)
- RET
-test:
- MOVBU (R1), R6
- ADDV $1, R1
- MOVBU (R2), R7
- ADDV $1, R2
- BEQ R6, R7, loop
-
- MOVB R0, ret+24(FP)
- RET
-eq:
- MOVV $1, R1
- MOVB R1, ret+24(FP)
- RET
-
-// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
- MOVV a+0(FP), R1
- MOVV b+8(FP), R2
- BEQ R1, R2, eq
- MOVV 8(REGCTXT), R3 // compiler stores size at offset 8 in the closure
- MOVV R1, 8(R29)
- MOVV R2, 16(R29)
- MOVV R3, 24(R29)
- JAL runtime·memequal(SB)
- MOVBU 32(R29), R1
- MOVB R1, ret+16(FP)
- RET
-eq:
- MOVV $1, R1
- MOVB R1, ret+16(FP)
- RET
-
-// TODO: share code with memequal?
-TEXT bytes·Equal(SB),NOSPLIT,$0-49
- MOVV a_len+8(FP), R3
- MOVV b_len+32(FP), R4
- BNE R3, R4, noteq // unequal lengths are not equal
-
- MOVV a+0(FP), R1
- MOVV b+24(FP), R2
- ADDV R1, R3 // end
-
-loop:
- BEQ R1, R3, equal // reached the end
- MOVBU (R1), R6
- ADDV $1, R1
- MOVBU (R2), R7
- ADDV $1, R2
- BEQ R6, R7, loop
-
-noteq:
- MOVB R0, ret+48(FP)
- RET
-
-equal:
- MOVV $1, R1
- MOVB R1, ret+48(FP)
- RET
-
TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R1
RET
// Not implemented.
TEXT runtime·aeshashstr(SB),NOSPLIT,$0
UNDEF
-
-// memequal(a, b unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT,$0-13
- MOVW a+0(FP), R1
- MOVW b+4(FP), R2
- BEQ R1, R2, eq
- MOVW size+8(FP), R3
- ADDU R1, R3, R4
-loop:
- BNE R1, R4, test
- MOVW $1, R1
- MOVB R1, ret+12(FP)
- RET
-test:
- MOVBU (R1), R6
- ADDU $1, R1
- MOVBU (R2), R7
- ADDU $1, R2
- BEQ R6, R7, loop
-
- MOVB R0, ret+12(FP)
- RET
-eq:
- MOVW $1, R1
- MOVB R1, ret+12(FP)
- RET
-
-// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
- MOVW a+0(FP), R1
- MOVW b+4(FP), R2
- BEQ R1, R2, eq
- MOVW 4(REGCTXT), R3 // compiler stores size at offset 4 in the closure
- ADDU R1, R3, R4
-loop:
- BNE R1, R4, test
- MOVW $1, R1
- MOVB R1, ret+8(FP)
- RET
-test:
- MOVBU (R1), R6
- ADDU $1, R1
- MOVBU (R2), R7
- ADDU $1, R2
- BEQ R6, R7, loop
-
- MOVB R0, ret+8(FP)
- RET
-eq:
- MOVW $1, R1
- MOVB R1, ret+8(FP)
- RET
-
-TEXT bytes·Equal(SB),NOSPLIT,$0-25
- MOVW a_len+4(FP), R3
- MOVW b_len+16(FP), R4
- BNE R3, R4, noteq // unequal lengths are not equal
-
- MOVW a+0(FP), R1
- MOVW b+12(FP), R2
- ADDU R1, R3 // end
-
-loop:
- BEQ R1, R3, equal // reached the end
- MOVBU (R1), R6
- ADDU $1, R1
- MOVBU (R2), R7
- ADDU $1, R2
- BEQ R6, R7, loop
-
-noteq:
- MOVB R0, ret+24(FP)
- RET
-
-equal:
- MOVW $1, R1
- MOVB R1, ret+24(FP)
- RET
-
TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVW s1_base+0(FP), R3
MOVW s1_len+4(FP), R1
TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
MOVW (R0), R1
-TEXT runtime·memequal(SB),NOSPLIT,$0-25
- MOVD a+0(FP), R3
- MOVD b+8(FP), R4
- MOVD size+16(FP), R5
-
- BL runtime·memeqbody(SB)
- MOVB R9, ret+24(FP)
- RET
-
-// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT,$40-17
- MOVD a+0(FP), R3
- MOVD b+8(FP), R4
- CMP R3, R4
- BEQ eq
- MOVD 8(R11), R5 // compiler stores size at offset 8 in the closure
- BL runtime·memeqbody(SB)
- MOVB R9, ret+16(FP)
- RET
-eq:
- MOVD $1, R3
- MOVB R3, ret+16(FP)
- RET
-
// Do an efficient memcmp for ppc64le
// R3 = s1 len
// R4 = s2 len
MOVD R3,(R7) // return value if A > B
RET
-// Do an efficient memequal for ppc64
-// R3 = s1
-// R4 = s2
-// R5 = len
-// R9 = return value
-TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
- MOVD R5,CTR
- CMP R5,$8 // only optimize >=8
- BLT simplecheck
- DCBT (R3) // cache hint
- DCBT (R4)
- CMP R5,$32 // optimize >= 32
- MOVD R5,R6 // needed if setup8a branch
- BLT setup8a // 8 byte moves only
-setup32a: // 8 byte aligned, >= 32 bytes
- SRADCC $5,R5,R6 // number of 32 byte chunks to compare
- MOVD R6,CTR
-loop32a:
- MOVD 0(R3),R6 // doublewords to compare
- MOVD 0(R4),R7
- MOVD 8(R3),R8 //
- MOVD 8(R4),R9
- CMP R6,R7 // bytes batch?
- BNE noteq
- MOVD 16(R3),R6
- MOVD 16(R4),R7
- CMP R8,R9 // bytes match?
- MOVD 24(R3),R8
- MOVD 24(R4),R9
- BNE noteq
- CMP R6,R7 // bytes match?
- BNE noteq
- ADD $32,R3 // bump up to next 32
- ADD $32,R4
- CMP R8,R9 // bytes match?
- BC 8,2,loop32a // br ctr and cr
- BNE noteq
- ANDCC $24,R5,R6 // Any 8 byte chunks?
- BEQ leftover // and result is 0
-setup8a:
- SRADCC $3,R6,R6 // get the 8 byte count
- BEQ leftover // shifted value is 0
- MOVD R6,CTR
-loop8:
- MOVD 0(R3),R6 // doublewords to compare
- ADD $8,R3
- MOVD 0(R4),R7
- ADD $8,R4
- CMP R6,R7 // match?
- BC 8,2,loop8 // bt ctr <> 0 && cr
- BNE noteq
-leftover:
- ANDCC $7,R5,R6 // check for leftover bytes
- BEQ equal
- MOVD R6,CTR
- BR simple
-simplecheck:
- CMP R5,$0
- BEQ equal
-simple:
- MOVBZ 0(R3), R6
- ADD $1,R3
- MOVBZ 0(R4), R7
- ADD $1,R4
- CMP R6, R7
- BNE noteq
- BC 8,2,simple
- BNE noteq
- BR equal
-noteq:
- MOVD $0, R9
- RET
-equal:
- MOVD $1, R9
- RET
-
-TEXT bytes·Equal(SB),NOSPLIT,$0-49
- MOVD a_len+8(FP), R4
- MOVD b_len+32(FP), R5
- CMP R5, R4 // unequal lengths are not equal
- BNE noteq
- MOVD a+0(FP), R3
- MOVD b+24(FP), R4
- BL runtime·memeqbody(SB)
-
- MOVBZ R9,ret+48(FP)
- RET
-
-noteq:
- MOVBZ $0,ret+48(FP)
- RET
-
-equal:
- MOVD $1,R3
- MOVBZ R3,ret+48(FP)
- RET
-
TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
MOVD s1_base+0(FP), R5
MOVD s2_base+16(FP), R6
TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
MOVW (R0), R15
-// memequal(a, b unsafe.Pointer, size uintptr) bool
-TEXT runtime·memequal(SB),NOSPLIT|NOFRAME,$0-25
- MOVD a+0(FP), R3
- MOVD b+8(FP), R5
- MOVD size+16(FP), R6
- LA ret+24(FP), R7
- BR runtime·memeqbody(SB)
-
-// memequal_varlen(a, b unsafe.Pointer) bool
-TEXT runtime·memequal_varlen(SB),NOSPLIT|NOFRAME,$0-17
- MOVD a+0(FP), R3
- MOVD b+8(FP), R5
- MOVD 8(R12), R6 // compiler stores size at offset 8 in the closure
- LA ret+16(FP), R7
- BR runtime·memeqbody(SB)
-
-TEXT bytes·Equal(SB),NOSPLIT|NOFRAME,$0-49
- MOVD a_len+8(FP), R2
- MOVD b_len+32(FP), R6
- MOVD a+0(FP), R3
- MOVD b+24(FP), R5
- LA ret+48(FP), R7
- CMPBNE R2, R6, notequal
- BR runtime·memeqbody(SB)
-notequal:
- MOVB $0, ret+48(FP)
- RET
-
-// input:
-// R3 = a
-// R5 = b
-// R6 = len
-// R7 = address of output byte (stores 0 or 1 here)
-// a and b have the same length
-TEXT runtime·memeqbody(SB),NOSPLIT|NOFRAME,$0-0
- CMPBEQ R3, R5, equal
-loop:
- CMPBEQ R6, $0, equal
- CMPBLT R6, $32, tiny
- CMP R6, $256
- BLT tail
- CLC $256, 0(R3), 0(R5)
- BNE notequal
- SUB $256, R6
- LA 256(R3), R3
- LA 256(R5), R5
- BR loop
-tail:
- SUB $1, R6, R8
- EXRL $runtime·memeqbodyclc(SB), R8
- BEQ equal
-notequal:
- MOVB $0, 0(R7)
- RET
-equal:
- MOVB $1, 0(R7)
- RET
-tiny:
- MOVD $0, R2
- CMPBLT R6, $16, lt16
- MOVD 0(R3), R8
- MOVD 0(R5), R9
- CMPBNE R8, R9, notequal
- MOVD 8(R3), R8
- MOVD 8(R5), R9
- CMPBNE R8, R9, notequal
- LA 16(R2), R2
- SUB $16, R6
-lt16:
- CMPBLT R6, $8, lt8
- MOVD 0(R3)(R2*1), R8
- MOVD 0(R5)(R2*1), R9
- CMPBNE R8, R9, notequal
- LA 8(R2), R2
- SUB $8, R6
-lt8:
- CMPBLT R6, $4, lt4
- MOVWZ 0(R3)(R2*1), R8
- MOVWZ 0(R5)(R2*1), R9
- CMPBNE R8, R9, notequal
- LA 4(R2), R2
- SUB $4, R6
-lt4:
-#define CHECK(n) \
- CMPBEQ R6, $n, equal \
- MOVB n(R3)(R2*1), R8 \
- MOVB n(R5)(R2*1), R9 \
- CMPBNE R8, R9, notequal
- CHECK(0)
- CHECK(1)
- CHECK(2)
- CHECK(3)
- BR equal
-
-TEXT runtime·memeqbodyclc(SB),NOSPLIT|NOFRAME,$0-0
- CLC $1, 0(R3), 0(R5)
- RET
-
TEXT runtime·return0(SB), NOSPLIT, $0
MOVW $0, R3
RET