p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
+ case ssa.OpAMD64VPTEST:
+ // Some instructions setting flags put their second operand into the destination reg.
+ // See also CMP[BWDQ].
+ p := s.Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = simdReg(v.Args[0])
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = simdReg(v.Args[1])
default:
if !ssaGenSIMDValue(s, v) {
(StoreMasked64 {t} ptr mask val mem) && t.Size() == 16 => (VPMASK64store128 ptr mask val mem)
(StoreMasked64 {t} ptr mask val mem) && t.Size() == 32 => (VPMASK64store256 ptr mask val mem)
+// Misc
+(IsZeroVec x) => (SETEQ (VPTEST x x))
+
// SIMD vector K-masked loads and stores
(LoadMasked64 <t> ptr mask mem) && t.Size() == 64 => (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
vloadk = regInfo{inputs: []regMask{gpspsb, mask, 0}, outputs: vonly}
vstorek = regInfo{inputs: []regMask{gpspsb, mask, v, 0}}
- v11 = regInfo{inputs: vzonly, outputs: vonly}
- v21 = regInfo{inputs: []regMask{vz, vz}, outputs: vonly}
- vk = regInfo{inputs: vzonly, outputs: maskonly}
- kv = regInfo{inputs: maskonly, outputs: vonly}
- v2k = regInfo{inputs: []regMask{vz, vz}, outputs: maskonly}
- vkv = regInfo{inputs: []regMask{vz, mask}, outputs: vonly}
- v2kv = regInfo{inputs: []regMask{vz, vz, mask}, outputs: vonly}
- v2kk = regInfo{inputs: []regMask{vz, vz, mask}, outputs: maskonly}
- v31 = regInfo{inputs: []regMask{v, vz, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
- v3kv = regInfo{inputs: []regMask{v, vz, vz, mask}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
- vgpv = regInfo{inputs: []regMask{vz, gp}, outputs: vonly}
- vgp = regInfo{inputs: vonly, outputs: gponly}
- vfpv = regInfo{inputs: []regMask{vz, fp}, outputs: vonly}
- vfpkv = regInfo{inputs: []regMask{vz, fp, mask}, outputs: vonly}
- fpv = regInfo{inputs: []regMask{fp}, outputs: vonly}
- gpv = regInfo{inputs: []regMask{gp}, outputs: vonly}
+ v11 = regInfo{inputs: vzonly, outputs: vonly}
+ v21 = regInfo{inputs: []regMask{vz, vz}, outputs: vonly}
+ vk = regInfo{inputs: vzonly, outputs: maskonly}
+ kv = regInfo{inputs: maskonly, outputs: vonly}
+ v2k = regInfo{inputs: []regMask{vz, vz}, outputs: maskonly}
+ vkv = regInfo{inputs: []regMask{vz, mask}, outputs: vonly}
+ v2kv = regInfo{inputs: []regMask{vz, vz, mask}, outputs: vonly}
+ v2kk = regInfo{inputs: []regMask{vz, vz, mask}, outputs: maskonly}
+ v31 = regInfo{inputs: []regMask{v, vz, vz}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
+ v3kv = regInfo{inputs: []regMask{v, vz, vz, mask}, outputs: vonly} // used in resultInArg0 ops, arg0 must not be x15
+ vgpv = regInfo{inputs: []regMask{vz, gp}, outputs: vonly}
+ vgp = regInfo{inputs: vonly, outputs: gponly}
+ vfpv = regInfo{inputs: []regMask{vz, fp}, outputs: vonly}
+ vfpkv = regInfo{inputs: []regMask{vz, fp, mask}, outputs: vonly}
+ fpv = regInfo{inputs: []regMask{fp}, outputs: vonly}
+ gpv = regInfo{inputs: []regMask{gp}, outputs: vonly}
+ v2flags = regInfo{inputs: []regMask{vz, vz}}
w11 = regInfo{inputs: wzonly, outputs: wonly}
w21 = regInfo{inputs: []regMask{wz, wz}, outputs: wonly}
{name: "KMOVDi", argLength: 1, reg: kgp, asm: "KMOVD"},
{name: "KMOVWi", argLength: 1, reg: kgp, asm: "KMOVW"},
{name: "KMOVBi", argLength: 1, reg: kgp, asm: "KMOVB"},
+
+ // VPTEST
+ {name: "VPTEST", asm: "VPTEST", argLength: 2, reg: v2flags, clobberFlags: true, typ: "Flags"},
}
var AMD64blocks = []blockData{
{name: "CvtMask64x2to8", argLength: 1}, // arg0 = mask
{name: "CvtMask64x4to8", argLength: 1}, // arg0 = mask
{name: "CvtMask64x8to8", argLength: 1}, // arg0 = mask
+
+ // Returns true if arg0 is all zero.
+ {name: "IsZeroVec", argLength: 1},
}
// kind controls successors implicit exit
OpAMD64KMOVDi
OpAMD64KMOVWi
OpAMD64KMOVBi
+ OpAMD64VPTEST
OpAMD64VADDPD128
OpAMD64VADDPD256
OpAMD64VADDPD512
OpCvtMask64x2to8
OpCvtMask64x4to8
OpCvtMask64x8to8
+ OpIsZeroVec
OpAbsInt8x16
OpAbsInt8x32
OpAbsInt8x64
},
},
},
+ {
+ name: "VPTEST",
+ argLen: 2,
+ clobberFlags: true,
+ asm: x86.AVPTEST,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ },
+ },
{
name: "VADDPD128",
argLen: 2,
argLen: 1,
generic: true,
},
+ {
+ name: "IsZeroVec",
+ argLen: 1,
+ generic: true,
+ },
{
name: "AbsInt8x16",
argLen: 1,
return rewriteValueAMD64_OpIsNonNil(v)
case OpIsSliceInBounds:
return rewriteValueAMD64_OpIsSliceInBounds(v)
+ case OpIsZeroVec:
+ return rewriteValueAMD64_OpIsZeroVec(v)
case OpLeadingZerosInt32x16:
v.Op = OpAMD64VPLZCNTD512
return true
return true
}
}
+func rewriteValueAMD64_OpIsZeroVec(v *Value) bool {
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (IsZeroVec x)
+ // result: (SETEQ (VPTEST x x))
+ for {
+ x := v_0
+ v.reset(OpAMD64SETEQ)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPTEST, types.TypeFlags)
+ v0.AddArg2(x, x)
+ v.AddArg(v0)
+ return true
+ }
+}
func rewriteValueAMD64_OpLeq16(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
return nil
},
sys.AMD64)
+ addF(simdPackage, "Int8x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Int16x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Int32x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Int64x2.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Uint8x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Uint16x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Uint32x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Uint64x2.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Int8x32.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Int16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Int32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Int64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Uint8x32.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
}
}
//
// Asm: VZEROUPPER, CPU Feature: AVX
func ClearAVXUpperBits()
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Int8x16) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Int8x32) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Int16x8) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Int16x16) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Int32x4) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Int32x8) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Int64x2) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Int64x4) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Uint8x16) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Uint8x32) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Uint16x8) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Uint16x16) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Uint32x4) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Uint32x8) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Uint64x2) IsZero() bool
+
+// IsZero returns true if all elements of x are zeros.
+//
+// This method compiles to VPTEST x, x.
+// x.And(y).IsZero() and x.AndNot(y).IsZero() will be optimized to VPTEST x, y
+//
+// Asm: VPTEST, CPU Feature: AVX
+func (x Uint64x4) IsZero() bool
}
}
}
+
+func TestIsZero(t *testing.T) {
+ v1 := simd.LoadUint64x2Slice([]uint64{0, 1})
+ v2 := simd.LoadUint64x2Slice([]uint64{0, 0})
+ if v1.IsZero() {
+ t.Errorf("Result incorrect, want false, got true")
+ }
+ if !v2.IsZero() {
+ t.Errorf("Result incorrect, want true, got false")
+ }
+ if !v1.And(v2).IsZero() {
+ t.Errorf("Result incorrect, want true, got false")
+ }
+ if v1.AndNot(v2).IsZero() {
+ t.Errorf("Result incorrect, want false, got true")
+ }
+ if !v2.And(v1).IsZero() {
+ t.Errorf("Result incorrect, want true, got false")
+ }
+ if !v2.AndNot(v1).IsZero() {
+ t.Errorf("Result incorrect, want true, got false")
+ }
+}