// license that can be found in the LICENSE file.
#include "go_asm.h"
+#include "asm_amd64.h"
#include "textflag.h"
TEXT ·Compare<ABIInternal>(SB),NOSPLIT,$0-56
CMPQ R8, $63
JBE loop
+#ifndef hasAVX2
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JEQ big_loop_avx2
JMP big_loop
+#else
+ JMP big_loop_avx2
+#endif
loop:
CMPQ R8, $16
JBE _0through16
RET
// this works for >= 64 bytes of data.
+#ifndef hasAVX2
big_loop:
MOVOU (SI), X0
MOVOU (DI), X1
CMPQ R8, $64
JBE loop
JMP big_loop
+#endif
// Compare 64-bytes per loop iteration.
// Loop is unrolled and uses AVX2.
// license that can be found in the LICENSE file.
#include "go_asm.h"
+#include "asm_amd64.h"
#include "textflag.h"
TEXT ·Count(SB),NOSPLIT,$0-40
+#ifndef hasPOPCNT
CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
JEQ 2(PC)
JMP ·countGeneric(SB)
+#endif
MOVQ b_base+0(FP), SI
MOVQ b_len+8(FP), BX
MOVB c+24(FP), AL
JMP countbody<>(SB)
TEXT ·CountString(SB),NOSPLIT,$0-32
+#ifndef hasPOPCNT
CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
JEQ 2(PC)
JMP ·countGenericString(SB)
+#endif
MOVQ s_base+0(FP), SI
MOVQ s_len+8(FP), BX
MOVB c+16(FP), AL
RET
avx2:
+#ifndef hasAVX2
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JNE sse
+#endif
MOVD AX, X0
LEAQ -32(SI)(BX*1), R11
VPBROADCASTB X0, Y1
// license that can be found in the LICENSE file.
#include "go_asm.h"
+#include "asm_amd64.h"
#include "textflag.h"
// memequal(a, b unsafe.Pointer, size uintptr) bool
JB small
CMPQ BX, $64
JB bigloop
+#ifndef hasAVX2
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JE hugeloop_avx2
JEQ hugeloop
XORQ AX, AX // return 0
RET
+#endif
// 64 bytes at a time using ymm registers
hugeloop_avx2:
VZEROUPPER
JMP success
sse42:
+#ifndef hasSSE42
CMPB internal∕cpu·X86+const_offsetX86HasSSE42(SB), $1
JNE no_sse42
+#endif
CMPQ AX, $12
// PCMPESTRI is slower than normal compare,
// so using it makes sense only if we advance 4+ bytes per compare
RET
avx2:
+#ifndef hasAVX2
CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JNE sse
+#endif
MOVD AX, X0
LEAQ -32(SI)(BX*1), R11
VPBROADCASTB X0, Y1
// Define features that are guaranteed to be supported by setting the AMD64 variable.
// If a feature is supported, there's no need to check it at runtime every time.
+#ifdef GOAMD64_v2
+#define hasPOPCNT
+#define hasSSE42
+#endif
+
#ifdef GOAMD64_v3
+#define hasAVX
#define hasAVX2
+#define hasPOPCNT
+#define hasSSE42
#endif
#ifdef GOAMD64_v4
+#define hasAVX
#define hasAVX2
+#define hasPOPCNT
+#define hasSSE42
#endif
fmt.Fprintf(out, "//go:build %s || %sle\n\n", base, base)
}
fmt.Fprintf(out, "#include \"go_asm.h\"\n")
+ if arch == "amd64" {
+ fmt.Fprintf(out, "#include \"asm_amd64.h\"\n")
+ }
fmt.Fprintf(out, "#include \"textflag.h\"\n\n")
fmt.Fprintf(out, "TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0\n")
}
// Clear the upper bits to get to a clean state. See issue #37174.
// It is safe here as Go code don't use the upper bits of Y registers.
p("#ifdef GOOS_darwin")
+ p("#ifndef hasAVX")
p("CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0")
p("JE 2(PC)")
+ p("#endif")
p("VZEROUPPER")
p("#endif")
// Code generated by mkpreempt.go; DO NOT EDIT.
#include "go_asm.h"
+#include "asm_amd64.h"
#include "textflag.h"
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
MOVQ R14, 96(SP)
MOVQ R15, 104(SP)
#ifdef GOOS_darwin
+ #ifndef hasAVX
CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $0
JE 2(PC)
+ #endif
VZEROUPPER
#endif
MOVUPS X0, 112(SP)