// Offsets into internal/cpu records for use in assembly.
const (
- x86_HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
- x86_HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
- x86_HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
- x86_HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
- s390x_HasVX = unsafe.Offsetof(cpu.S390X.HasVX)
+ offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
+ offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
+ offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
+ offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)
+
+ offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX)
)
// MaxLen is the maximum length of the string to be searched for (argument b) in Index.
CMPQ R8, $63
JBE loop
- CMPB internal∕cpu·X86+const_x86_HasAVX2(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JEQ big_loop_avx2
JMP big_loop
loop:
#include "textflag.h"
TEXT ·Count(SB),NOSPLIT,$0-40
- CMPB internal∕cpu·X86+const_x86_HasPOPCNT(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
JEQ 2(PC)
JMP ·countGeneric(SB)
MOVQ b_base+0(FP), SI
JMP countbody<>(SB)
TEXT ·CountString(SB),NOSPLIT,$0-32
- CMPB internal∕cpu·X86+const_x86_HasPOPCNT(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasPOPCNT(SB), $1
JEQ 2(PC)
JMP ·countGenericString(SB)
MOVQ s_base+0(FP), SI
RET
avx2:
- CMPB internal∕cpu·X86+const_x86_HasAVX2(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JNE sse
MOVD AX, X0
LEAQ -32(SI)(BX*1), R11
hugeloop:
CMPL BX, $64
JB bigloop
- CMPB internal∕cpu·X86+const_x86_HasSSE2(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
JNE bigloop
MOVOU (SI), X0
MOVOU (DI), X1
JB small
CMPQ BX, $64
JB bigloop
- CMPB internal∕cpu·X86+const_x86_HasAVX2(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JE hugeloop_avx2
// 64 bytes at a time using xmm registers
VZEROUPPER
JMP success
sse42:
- CMPB internal∕cpu·X86+const_x86_HasSSE42(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasSSE42(SB), $1
JNE no_sse42
CMPQ AX, $12
// PCMPESTRI is slower than normal compare,
RET
avx2:
- CMPB internal∕cpu·X86+const_x86_HasAVX2(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JNE sse
MOVD AX, X0
LEAQ -32(SI)(BX*1), R11
RET
large:
- MOVBZ internal∕cpu·S390X+const_s390x_HasVX(SB), R1
+ MOVBZ internal∕cpu·S390X+const_offsetS390xHasVX(SB), R1
CMPBNE R1, $0, vectorimpl
srstimpl: // no vector facility
// func cputicks() int64
TEXT runtime·cputicks(SB),NOSPLIT,$0-8
- CMPB internal∕cpu·X86+const_offset_x86_HasSSE2(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
JNE done
CMPB runtime·lfenceBeforeRdtsc(SB), $1
JNE mfence
// Offsets into internal/cpu records for use in assembly.
const (
- offset_x86_HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
- offset_x86_HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
- offset_x86_HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
+ offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
+ offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
+ offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
- offset_arm_HasIDIVA = unsafe.Offsetof(cpu.ARM.HasIDIVA)
+ offsetARMHasIDIVA = unsafe.Offsetof(cpu.ARM.HasIDIVA)
)
JBE _5through8
CMPL BX, $16
JBE _9through16
- CMPB internal∕cpu·X86+const_offset_x86_HasSSE2(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
JNE nosse2
PXOR X0, X0
CMPL BX, $32
JBE _65through128
CMPQ BX, $256
JBE _129through256
- CMPB internal∕cpu·X86+const_offset_x86_HasAVX2(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasAVX2(SB), $1
JE loop_preheader_avx2
// TODO: for really big clears, use MOVNTDQ, even without AVX2.
JBE move_5through8
CMPL BX, $16
JBE move_9through16
- CMPB internal∕cpu·X86+const_offset_x86_HasSSE2(SB), $1
+ CMPB internal∕cpu·X86+const_offsetX86HasSSE2(SB), $1
JNE nosse2
CMPL BX, $32
JBE move_17through32
*/
forward:
// If REP MOVSB isn't fast, don't use it
- CMPB internal∕cpu·X86+const_offset_x86_HasERMS(SB), $1 // enhanced REP MOVSB/STOSB
+ CMPB internal∕cpu·X86+const_offsetX86HasERMS(SB), $1 // enhanced REP MOVSB/STOSB
JNE fwdBy4
// Check alignment
JLS move_256through2048
// If REP MOVSB isn't fast, don't use it
- CMPB internal∕cpu·X86+const_offset_x86_HasERMS(SB), $1 // enhanced REP MOVSB/STOSB
+ CMPB internal∕cpu·X86+const_offsetX86HasERMS(SB), $1 // enhanced REP MOVSB/STOSB
JNE fwdBy8
// Check alignment
// the RET instruction will clobber R12 on nacl, and the compiler's register
// allocator needs to know.
TEXT runtime·udiv(SB),NOSPLIT|NOFRAME,$0
- MOVBU internal∕cpu·ARM+const_offset_arm_HasIDIVA(SB), Ra
+ MOVBU internal∕cpu·ARM+const_offsetARMHasIDIVA(SB), Ra
CMP $0, Ra
BNE udiv_hardware