"cmd/internal/obj"
"cmd/internal/obj/x86"
"internal/abi"
- "internal/buildcfg"
)
// ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
case ssa.OpAMD64VZEROUPPER, ssa.OpAMD64VZEROALL:
s.Prog(v.Op.Asm())
- case ssa.OpAMD64Zero128, ssa.OpAMD64Zero256, ssa.OpAMD64Zero512: // no code emitted
+ case ssa.OpAMD64Zero128: // no code emitted
+
+ case ssa.OpAMD64Zero256, ssa.OpAMD64Zero512:
+ p := s.Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = simdReg(v)
+ p.AddRestSourceReg(simdReg(v))
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = simdReg(v)
case ssa.OpAMD64VMOVSSf2v, ssa.OpAMD64VMOVSDf2v:
// These are for initializing the least 32/64 bits of a SIMD register from a "float".
// zeroX15 zeroes the X15 register.
func zeroX15(s *ssagen.State) {
- if !buildcfg.Experiment.SIMD {
- opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
- return
- }
- vxorps := func(s *ssagen.State) {
- p := s.Prog(x86.AVXORPS)
- p.From.Type = obj.TYPE_REG
- p.From.Reg = x86.REG_X15
- p.AddRestSourceReg(x86.REG_X15)
- p.To.Type = obj.TYPE_REG
- p.To.Reg = x86.REG_X15
- }
- if buildcfg.GOAMD64 >= 3 {
- vxorps(s)
- return
- }
- // AVX may not be available, check before zeroing the high bits.
- p := s.Prog(x86.ACMPB)
- p.From.Type = obj.TYPE_MEM
- p.From.Name = obj.NAME_EXTERN
- p.From.Sym = ir.Syms.X86HasAVX
- p.To.Type = obj.TYPE_CONST
- p.To.Offset = 1
- jmp := s.Prog(x86.AJNE)
- jmp.To.Type = obj.TYPE_BRANCH
- vxorps(s)
- sse := opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
- jmp.To.SetTarget(sse)
+ opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
}
// Example instruction: VRSQRTPS X1, X1
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
+ v01 = regInfo{inputs: nil, outputs: vonly}
v11 = regInfo{inputs: vonly, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
v21 = regInfo{inputs: []regMask{v, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
vk = regInfo{inputs: vzonly, outputs: maskonly}
gpv = regInfo{inputs: []regMask{gp}, outputs: vonly}
v2flags = regInfo{inputs: []regMask{vz, vz}}
+ w01 = regInfo{inputs: nil, outputs: wonly}
w11 = regInfo{inputs: wonly, outputs: wonly} // used in resultInArg0 ops, arg0 must not be x15
w21 = regInfo{inputs: []regMask{wz, wz}, outputs: wonly}
wk = regInfo{inputs: wzonly, outputs: maskonly}
{name: "VPMOVVec64x4ToM", argLength: 1, reg: vk, asm: "VPMOVQ2M"},
{name: "VPMOVVec64x8ToM", argLength: 1, reg: wk, asm: "VPMOVQ2M"},
+ // X15 is the zero register up to 128-bit. For larger values, we zero it on the fly.
{name: "Zero128", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
- {name: "Zero256", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
- {name: "Zero512", argLength: 0, reg: x15only, zeroWidth: true, fixedReg: true},
+ {name: "Zero256", argLength: 0, reg: v01, asm: "VPXOR"},
+ {name: "Zero512", argLength: 0, reg: w01, asm: "VPXORQ"},
+ // Move a 32/64 bit float to a 128-bit SIMD register.
{name: "VMOVSDf2v", argLength: 1, reg: fpv, asm: "VMOVSD"},
{name: "VMOVSSf2v", argLength: 1, reg: fpv, asm: "VMOVSS"},
+
{name: "VMOVQ", argLength: 1, reg: gpv, asm: "VMOVQ"},
{name: "VMOVD", argLength: 1, reg: gpv, asm: "VMOVD"},
},
},
{
- name: "Zero256",
- argLen: 0,
- zeroWidth: true,
- fixedReg: true,
+ name: "Zero256",
+ argLen: 0,
+ asm: x86.AVPXOR,
reg: regInfo{
outputs: []outputInfo{
- {0, 2147483648}, // X15
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
{
- name: "Zero512",
- argLen: 0,
- zeroWidth: true,
- fixedReg: true,
+ name: "Zero512",
+ argLen: 0,
+ asm: x86.AVPXORQ,
reg: regInfo{
outputs: []outputInfo{
- {0, 2147483648}, // X15
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
},
},
// there's no need to handle that. Clear R14 so that there's
// a bad value in there, in case needm tries to use it.
XORPS X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
XORQ R14, R14
MOVQ $runtime·needAndBindM<ABIInternal>(SB), AX
CALL AX
get_tls(R14)
MOVQ g(R14), R14
XORPS X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
JMP ·sigpanic<ABIInternal>(SB)
// gcWriteBarrier informs the GC about heap pointer writes.
// Back to Go world, set special registers.
// The g register (R14) is preserved in C.
XORPS X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
RET
// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
get_tls(R12)
MOVQ g(R12), R14
PXOR X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
// Reserve space for spill slots.
NOP SP // disable vet stack checking
// R14 is cleared in case there's a non-zero value in there
// if called from a non-go thread.
XORPS X15, X15
-#ifdef GOEXPERIMENT_simd
- CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1
- JNE 2(PC)
- VXORPS X15, X15, X15
-#endif
XORQ R14, R14
get_tls(AX)