// Misc
(IsZeroVec x) => (SETEQ (VPTEST x x))
+(IsNaNFloat32x4 x) => (VCMPPS128 [3] x x)
+(IsNaNFloat32x8 x) => (VCMPPS256 [3] x x)
+(IsNaNFloat32x16 x) => (VPMOVMToVec32x16 (VCMPPS512 [3] x x))
+(IsNaNFloat64x2 x) => (VCMPPD128 [3] x x)
+(IsNaNFloat64x4 x) => (VCMPPD256 [3] x x)
+(IsNaNFloat64x8 x) => (VPMOVMToVec64x8 (VCMPPD512 [3] x x))
+
// SIMD vector K-masked loads and stores
(LoadMasked64 <t> ptr mask mem) && t.Size() == 64 => (VPMASK64load512 ptr (VPMOVVec64x8ToM <types.TypeMask> mask) mem)
// Returns true if arg0 is all zero.
{name: "IsZeroVec", argLength: 1},
+
+ // Returns a mask indicating whether arg0's elements are NaN.
+ {name: "IsNaNFloat32x4", argLength: 1},
+ {name: "IsNaNFloat32x8", argLength: 1},
+ {name: "IsNaNFloat32x16", argLength: 1},
+ {name: "IsNaNFloat64x2", argLength: 1},
+ {name: "IsNaNFloat64x4", argLength: 1},
+ {name: "IsNaNFloat64x8", argLength: 1},
}
// kind controls successors implicit exit
(InterleaveLoGroupedUint32x16 ...) => (VPUNPCKLDQ512 ...)
(InterleaveLoGroupedUint64x4 ...) => (VPUNPCKLQDQ256 ...)
(InterleaveLoGroupedUint64x8 ...) => (VPUNPCKLQDQ512 ...)
-(IsNanFloat32x4 x y) => (VCMPPS128 [3] x y)
-(IsNanFloat32x8 x y) => (VCMPPS256 [3] x y)
-(IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
-(IsNanFloat64x2 x y) => (VCMPPD128 [3] x y)
-(IsNanFloat64x4 x y) => (VCMPPD256 [3] x y)
-(IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
(LeadingZerosInt32x4 ...) => (VPLZCNTD128 ...)
(LeadingZerosInt32x8 ...) => (VPLZCNTD256 ...)
(LeadingZerosInt32x16 ...) => (VPLZCNTD512 ...)
{name: "InterleaveLoUint16x8", argLength: 2, commutative: false},
{name: "InterleaveLoUint32x4", argLength: 2, commutative: false},
{name: "InterleaveLoUint64x2", argLength: 2, commutative: false},
- {name: "IsNanFloat32x4", argLength: 2, commutative: true},
- {name: "IsNanFloat32x8", argLength: 2, commutative: true},
- {name: "IsNanFloat32x16", argLength: 2, commutative: true},
- {name: "IsNanFloat64x2", argLength: 2, commutative: true},
- {name: "IsNanFloat64x4", argLength: 2, commutative: true},
- {name: "IsNanFloat64x8", argLength: 2, commutative: true},
{name: "LeadingZerosInt32x4", argLength: 1, commutative: false},
{name: "LeadingZerosInt32x8", argLength: 1, commutative: false},
{name: "LeadingZerosInt32x16", argLength: 1, commutative: false},
OpCvtMask64x4to8
OpCvtMask64x8to8
OpIsZeroVec
+ OpIsNaNFloat32x4
+ OpIsNaNFloat32x8
+ OpIsNaNFloat32x16
+ OpIsNaNFloat64x2
+ OpIsNaNFloat64x4
+ OpIsNaNFloat64x8
OpAESDecryptLastRoundUint8x16
OpAESDecryptLastRoundUint8x32
OpAESDecryptLastRoundUint8x64
OpInterleaveLoUint16x8
OpInterleaveLoUint32x4
OpInterleaveLoUint64x2
- OpIsNanFloat32x4
- OpIsNanFloat32x8
- OpIsNanFloat32x16
- OpIsNanFloat64x2
- OpIsNanFloat64x4
- OpIsNanFloat64x8
OpLeadingZerosInt32x4
OpLeadingZerosInt32x8
OpLeadingZerosInt32x16
argLen: 1,
generic: true,
},
+ {
+ name: "IsNaNFloat32x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat32x8",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat32x16",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat64x2",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat64x4",
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "IsNaNFloat64x8",
+ argLen: 1,
+ generic: true,
+ },
{
name: "AESDecryptLastRoundUint8x16",
argLen: 2,
argLen: 2,
generic: true,
},
- {
- name: "IsNanFloat32x4",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat32x8",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat32x16",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat64x2",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat64x4",
- argLen: 2,
- commutative: true,
- generic: true,
- },
- {
- name: "IsNanFloat64x8",
- argLen: 2,
- commutative: true,
- generic: true,
- },
{
name: "LeadingZerosInt32x4",
argLen: 1,
return true
case OpIsInBounds:
return rewriteValueAMD64_OpIsInBounds(v)
- case OpIsNanFloat32x16:
- return rewriteValueAMD64_OpIsNanFloat32x16(v)
- case OpIsNanFloat32x4:
- return rewriteValueAMD64_OpIsNanFloat32x4(v)
- case OpIsNanFloat32x8:
- return rewriteValueAMD64_OpIsNanFloat32x8(v)
- case OpIsNanFloat64x2:
- return rewriteValueAMD64_OpIsNanFloat64x2(v)
- case OpIsNanFloat64x4:
- return rewriteValueAMD64_OpIsNanFloat64x4(v)
- case OpIsNanFloat64x8:
- return rewriteValueAMD64_OpIsNanFloat64x8(v)
+ case OpIsNaNFloat32x16:
+ return rewriteValueAMD64_OpIsNaNFloat32x16(v)
+ case OpIsNaNFloat32x4:
+ return rewriteValueAMD64_OpIsNaNFloat32x4(v)
+ case OpIsNaNFloat32x8:
+ return rewriteValueAMD64_OpIsNaNFloat32x8(v)
+ case OpIsNaNFloat64x2:
+ return rewriteValueAMD64_OpIsNaNFloat64x2(v)
+ case OpIsNaNFloat64x4:
+ return rewriteValueAMD64_OpIsNaNFloat64x4(v)
+ case OpIsNaNFloat64x8:
+ return rewriteValueAMD64_OpIsNaNFloat64x8(v)
case OpIsNonNil:
return rewriteValueAMD64_OpIsNonNil(v)
case OpIsSliceInBounds:
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
- // match: (IsNanFloat32x16 x y)
- // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y))
+ // match: (IsNaNFloat32x16 x)
+ // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x x))
for {
x := v_0
- y := v_1
v.reset(OpAMD64VPMOVMToVec32x16)
v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask)
v0.AuxInt = uint8ToAuxInt(3)
- v0.AddArg2(x, y)
+ v0.AddArg2(x, x)
v.AddArg(v0)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat32x4 x y)
- // result: (VCMPPS128 [3] x y)
+ // match: (IsNaNFloat32x4 x)
+ // result: (VCMPPS128 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPS128)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat32x8(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat32x8 x y)
- // result: (VCMPPS256 [3] x y)
+ // match: (IsNaNFloat32x8 x)
+ // result: (VCMPPS256 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPS256)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x2(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat64x2 x y)
- // result: (VCMPPD128 [3] x y)
+ // match: (IsNaNFloat64x2 x)
+ // result: (VCMPPD128 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPD128)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x4(v *Value) bool {
v_0 := v.Args[0]
- // match: (IsNanFloat64x4 x y)
- // result: (VCMPPD256 [3] x y)
+ // match: (IsNaNFloat64x4 x)
+ // result: (VCMPPD256 [3] x x)
for {
x := v_0
- y := v_1
v.reset(OpAMD64VCMPPD256)
v.AuxInt = uint8ToAuxInt(3)
- v.AddArg2(x, y)
+ v.AddArg2(x, x)
return true
}
}
-func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool {
- v_1 := v.Args[1]
+func rewriteValueAMD64_OpIsNaNFloat64x8(v *Value) bool {
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
- // match: (IsNanFloat64x8 x y)
- // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y))
+ // match: (IsNaNFloat64x8 x)
+ // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x x))
for {
x := v_0
- y := v_1
v.reset(OpAMD64VPMOVMToVec64x8)
v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask)
v0.AuxInt = uint8ToAuxInt(3)
- v0.AddArg2(x, y)
+ v0.AddArg2(x, x)
v.AddArg(v0)
return true
}
addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64)
+ addF(simdPackage, "Float32x4.IsNaN", opLen1(ssa.OpIsNaNFloat32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float32x8.IsNaN", opLen1(ssa.OpIsNaNFloat32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float32x16.IsNaN", opLen1(ssa.OpIsNaNFloat32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float64x2.IsNaN", opLen1(ssa.OpIsNaNFloat64x2, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Float64x4.IsNaN", opLen1(ssa.OpIsNaNFloat64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Float64x8.IsNaN", opLen1(ssa.OpIsNaNFloat64x8, types.TypeVec512), sys.AMD64)
// sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go.
sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) {
addF(simdPackage, "Uint32x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x4.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x16, types.TypeVec512), sys.AMD64)
commutative: false
documentation: !string |-
// NAME returns a mask whose elements indicate whether x <= y.
-- go: IsNan # For float only.
- constImm: 3
- commutative: true
- documentation: !string |-
- // NAME checks if elements are NaN. Use as x.IsNan(x).
- go: NotEqual
constImm: 4
commutative: true
- class: mask
# Floats
-- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
regexpTag: "compares"
asm: "VCMPP[SD]"
in:
- go: $t
overwriteBase: int
overwriteClass: mask
-- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan)
+- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual)
regexpTag: "compares"
asm: "VCMPP[SD]"
in:
//
// Asm: VPTEST, CPU Feature: AVX
func (x Uint64x4) IsZero() bool
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x4) IsNaN() Mask32x4
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPS, CPU Feature: AVX
+func (x Float32x8) IsNaN() Mask32x8
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPS, CPU Feature: AVX512
+func (x Float32x16) IsNaN() Mask32x16
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x2) IsNaN() Mask64x2
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPD, CPU Feature: AVX
+func (x Float64x4) IsNaN() Mask64x4
+
+// IsNaN returns a mask whose elements indicate whether the corresponding
+// elements of x are NaN.
+//
+// Asm: VCMPPD, CPU Feature: AVX512
+func (x Float64x8) IsNaN() Mask64x8
// Asm: VPUNPCKLQDQ, CPU Feature: AVX512
func (x Uint64x8) InterleaveLoGrouped(y Uint64x8) Uint64x8
-/* IsNan */
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x4) IsNan(y Float32x4) Mask32x4
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPS, CPU Feature: AVX
-func (x Float32x8) IsNan(y Float32x8) Mask32x8
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPS, CPU Feature: AVX512
-func (x Float32x16) IsNan(y Float32x16) Mask32x16
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x2) IsNan(y Float64x2) Mask64x2
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPD, CPU Feature: AVX
-func (x Float64x4) IsNan(y Float64x4) Mask64x4
-
-// IsNan checks if elements are NaN. Use as x.IsNan(x).
-//
-// Asm: VCMPPD, CPU Feature: AVX512
-func (x Float64x8) IsNan(y Float64x8) Mask64x8
-
/* LeadingZeros */
// LeadingZeros counts the leading zeros of each element in x.