From: Cherry Mui Date: Wed, 31 Dec 2025 07:42:30 +0000 (-0500) Subject: simd/archsimd: make IsNaN unary X-Git-Tag: go1.26rc2~7^2~7 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=13440fb51831bfde5804430596d2045a64fd2209;p=gostls13.git simd/archsimd: make IsNaN unary Currently, the IsNan API is defined as x.IsNan(y), which returns a mask to represent, for each element, either x or y is NaN. Albeit closer to the machine instruction, this is weird API, as IsNaN is a unary operation. This CL changes it to unary, x.IsNaN(). It compiles to VCMPPS $3, x, x (or VCMPPD). For the two-operand version, we can optimize x.IsNaN().Or(y.IsNaN()) to VCMPPS $3, x, y (not done in this CL). While here, change the name to IsNaN (uppercase both Ns), which matches math.IsNaN. Tests in the next CL. Change-Id: Ib6e7afc2635e6c3c606db5ea16420ee673a6c6d6 Reviewed-on: https://go-review.googlesource.com/c/go/+/733660 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules index 38ca44f7eb..9c54186854 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules @@ -1730,6 +1730,13 @@ // Misc (IsZeroVec x) => (SETEQ (VPTEST x x)) +(IsNaNFloat32x4 x) => (VCMPPS128 [3] x x) +(IsNaNFloat32x8 x) => (VCMPPS256 [3] x x) +(IsNaNFloat32x16 x) => (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) +(IsNaNFloat64x2 x) => (VCMPPD128 [3] x x) +(IsNaNFloat64x4 x) => (VCMPPD256 [3] x x) +(IsNaNFloat64x8 x) => (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) + // SIMD vector K-masked loads and stores (LoadMasked64 ptr mask mem) && t.Size() == 64 => (VPMASK64load512 ptr (VPMOVVec64x8ToM mask) mem) diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go index 8637133e5f..85bde1aab2 100644 --- a/src/cmd/compile/internal/ssa/_gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go @@ -715,6 +715,14 @@ var genericOps = []opData{ // Returns true if arg0 is all zero. {name: "IsZeroVec", argLength: 1}, + + // Returns a mask indicating whether arg0's elements are NaN. + {name: "IsNaNFloat32x4", argLength: 1}, + {name: "IsNaNFloat32x8", argLength: 1}, + {name: "IsNaNFloat32x16", argLength: 1}, + {name: "IsNaNFloat64x2", argLength: 1}, + {name: "IsNaNFloat64x4", argLength: 1}, + {name: "IsNaNFloat64x8", argLength: 1}, } // kind controls successors implicit exit diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 8a5b70da30..5c83f39a1f 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -559,12 +559,6 @@ (InterleaveLoGroupedUint32x16 ...) => (VPUNPCKLDQ512 ...) (InterleaveLoGroupedUint64x4 ...) => (VPUNPCKLQDQ256 ...) (InterleaveLoGroupedUint64x8 ...) => (VPUNPCKLQDQ512 ...) -(IsNanFloat32x4 x y) => (VCMPPS128 [3] x y) -(IsNanFloat32x8 x y) => (VCMPPS256 [3] x y) -(IsNanFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) -(IsNanFloat64x2 x y) => (VCMPPD128 [3] x y) -(IsNanFloat64x4 x y) => (VCMPPD256 [3] x y) -(IsNanFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) (LeadingZerosInt32x4 ...) => (VPLZCNTD128 ...) (LeadingZerosInt32x8 ...) => (VPLZCNTD256 ...) (LeadingZerosInt32x16 ...) => (VPLZCNTD512 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index af1007cd54..889ab0d84f 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -519,12 +519,6 @@ func simdGenericOps() []opData { {name: "InterleaveLoUint16x8", argLength: 2, commutative: false}, {name: "InterleaveLoUint32x4", argLength: 2, commutative: false}, {name: "InterleaveLoUint64x2", argLength: 2, commutative: false}, - {name: "IsNanFloat32x4", argLength: 2, commutative: true}, - {name: "IsNanFloat32x8", argLength: 2, commutative: true}, - {name: "IsNanFloat32x16", argLength: 2, commutative: true}, - {name: "IsNanFloat64x2", argLength: 2, commutative: true}, - {name: "IsNanFloat64x4", argLength: 2, commutative: true}, - {name: "IsNanFloat64x8", argLength: 2, commutative: true}, {name: "LeadingZerosInt32x4", argLength: 1, commutative: false}, {name: "LeadingZerosInt32x8", argLength: 1, commutative: false}, {name: "LeadingZerosInt32x16", argLength: 1, commutative: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d2ba15f740..abaf7911d4 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -6158,6 +6158,12 @@ const ( OpCvtMask64x4to8 OpCvtMask64x8to8 OpIsZeroVec + OpIsNaNFloat32x4 + OpIsNaNFloat32x8 + OpIsNaNFloat32x16 + OpIsNaNFloat64x2 + OpIsNaNFloat64x4 + OpIsNaNFloat64x8 OpAESDecryptLastRoundUint8x16 OpAESDecryptLastRoundUint8x32 OpAESDecryptLastRoundUint8x64 @@ -6673,12 +6679,6 @@ const ( OpInterleaveLoUint16x8 OpInterleaveLoUint32x4 OpInterleaveLoUint64x2 - OpIsNanFloat32x4 - OpIsNanFloat32x8 - OpIsNanFloat32x16 - OpIsNanFloat64x2 - OpIsNanFloat64x4 - OpIsNanFloat64x8 OpLeadingZerosInt32x4 OpLeadingZerosInt32x8 OpLeadingZerosInt32x16 @@ -88993,6 +88993,36 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "IsNaNFloat32x4", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat32x8", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat32x16", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat64x2", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat64x4", + argLen: 1, + generic: true, + }, + { + name: "IsNaNFloat64x8", + argLen: 1, + generic: true, + }, { name: "AESDecryptLastRoundUint8x16", argLen: 2, @@ -91670,42 +91700,6 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "IsNanFloat32x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat32x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat32x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat64x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "IsNanFloat64x8", - argLen: 2, - commutative: true, - generic: true, - }, { name: "LeadingZerosInt32x4", argLen: 1, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 0c04410074..0b2bb74ce4 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -3773,18 +3773,18 @@ func rewriteValueAMD64(v *Value) bool { return true case OpIsInBounds: return rewriteValueAMD64_OpIsInBounds(v) - case OpIsNanFloat32x16: - return rewriteValueAMD64_OpIsNanFloat32x16(v) - case OpIsNanFloat32x4: - return rewriteValueAMD64_OpIsNanFloat32x4(v) - case OpIsNanFloat32x8: - return rewriteValueAMD64_OpIsNanFloat32x8(v) - case OpIsNanFloat64x2: - return rewriteValueAMD64_OpIsNanFloat64x2(v) - case OpIsNanFloat64x4: - return rewriteValueAMD64_OpIsNanFloat64x4(v) - case OpIsNanFloat64x8: - return rewriteValueAMD64_OpIsNanFloat64x8(v) + case OpIsNaNFloat32x16: + return rewriteValueAMD64_OpIsNaNFloat32x16(v) + case OpIsNaNFloat32x4: + return rewriteValueAMD64_OpIsNaNFloat32x4(v) + case OpIsNaNFloat32x8: + return rewriteValueAMD64_OpIsNaNFloat32x8(v) + case OpIsNaNFloat64x2: + return rewriteValueAMD64_OpIsNaNFloat64x2(v) + case OpIsNaNFloat64x4: + return rewriteValueAMD64_OpIsNaNFloat64x4(v) + case OpIsNaNFloat64x8: + return rewriteValueAMD64_OpIsNaNFloat64x8(v) case OpIsNonNil: return rewriteValueAMD64_OpIsNonNil(v) case OpIsSliceInBounds: @@ -70957,94 +70957,82 @@ func rewriteValueAMD64_OpIsInBounds(v *Value) bool { return true } } -func rewriteValueAMD64_OpIsNanFloat32x16(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat32x16(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (IsNanFloat32x16 x y) - // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x y)) + // match: (IsNaNFloat32x16 x) + // result: (VPMOVMToVec32x16 (VCMPPS512 [3] x x)) for { x := v_0 - y := v_1 v.reset(OpAMD64VPMOVMToVec32x16) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPS512, typ.Mask) v0.AuxInt = uint8ToAuxInt(3) - v0.AddArg2(x, y) + v0.AddArg2(x, x) v.AddArg(v0) return true } } -func rewriteValueAMD64_OpIsNanFloat32x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat32x4(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat32x4 x y) - // result: (VCMPPS128 [3] x y) + // match: (IsNaNFloat32x4 x) + // result: (VCMPPS128 [3] x x) for { x := v_0 - y := v_1 v.reset(OpAMD64VCMPPS128) v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) + v.AddArg2(x, x) return true } } -func rewriteValueAMD64_OpIsNanFloat32x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat32x8(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat32x8 x y) - // result: (VCMPPS256 [3] x y) + // match: (IsNaNFloat32x8 x) + // result: (VCMPPS256 [3] x x) for { x := v_0 - y := v_1 v.reset(OpAMD64VCMPPS256) v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) + v.AddArg2(x, x) return true } } -func rewriteValueAMD64_OpIsNanFloat64x2(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat64x2(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat64x2 x y) - // result: (VCMPPD128 [3] x y) + // match: (IsNaNFloat64x2 x) + // result: (VCMPPD128 [3] x x) for { x := v_0 - y := v_1 v.reset(OpAMD64VCMPPD128) v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) + v.AddArg2(x, x) return true } } -func rewriteValueAMD64_OpIsNanFloat64x4(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat64x4(v *Value) bool { v_0 := v.Args[0] - // match: (IsNanFloat64x4 x y) - // result: (VCMPPD256 [3] x y) + // match: (IsNaNFloat64x4 x) + // result: (VCMPPD256 [3] x x) for { x := v_0 - y := v_1 v.reset(OpAMD64VCMPPD256) v.AuxInt = uint8ToAuxInt(3) - v.AddArg2(x, y) + v.AddArg2(x, x) return true } } -func rewriteValueAMD64_OpIsNanFloat64x8(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpIsNaNFloat64x8(v *Value) bool { v_0 := v.Args[0] b := v.Block typ := &b.Func.Config.Types - // match: (IsNanFloat64x8 x y) - // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x y)) + // match: (IsNaNFloat64x8 x) + // result: (VPMOVMToVec64x8 (VCMPPD512 [3] x x)) for { x := v_0 - y := v_1 v.reset(OpAMD64VPMOVMToVec64x8) v0 := b.NewValue0(v.Pos, OpAMD64VCMPPD512, typ.Mask) v0.AuxInt = uint8ToAuxInt(3) - v0.AddArg2(x, y) + v0.AddArg2(x, x) v.AddArg(v0) return true } diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go index 4425c5617b..e2eebd783d 100644 --- a/src/cmd/compile/internal/ssagen/intrinsics.go +++ b/src/cmd/compile/internal/ssagen/intrinsics.go @@ -1667,6 +1667,12 @@ func initIntrinsics(cfg *intrinsicBuildConfig) { addF(simdPackage, "Uint16x16.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64) addF(simdPackage, "Uint32x8.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64) addF(simdPackage, "Uint64x4.IsZero", opLen1(ssa.OpIsZeroVec, types.Types[types.TBOOL]), sys.AMD64) + addF(simdPackage, "Float32x4.IsNaN", opLen1(ssa.OpIsNaNFloat32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x8.IsNaN", opLen1(ssa.OpIsNaNFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.IsNaN", opLen1(ssa.OpIsNaNFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x2.IsNaN", opLen1(ssa.OpIsNaNFloat64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x4.IsNaN", opLen1(ssa.OpIsNaNFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.IsNaN", opLen1(ssa.OpIsNaNFloat64x8, types.TypeVec512), sys.AMD64) // sfp4 is intrinsic-if-constant, but otherwise it's complicated enough to just implement in Go. sfp4 := func(method string, hwop ssa.Op, vectype *types.Type) { diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 5a95761228..4ad0c6032c 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -571,12 +571,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint32x16.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint64x4.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.InterleaveLoGrouped", opLen2(ssa.OpInterleaveLoGroupedUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x4.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.LeadingZeros", opLen1(ssa.OpLeadingZerosInt32x16, types.TypeVec512), sys.AMD64) diff --git a/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml index eb1d8161c3..97ee587503 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Compares/categories.yaml @@ -21,11 +21,6 @@ commutative: false documentation: !string |- // NAME returns a mask whose elements indicate whether x <= y. -- go: IsNan # For float only. - constImm: 3 - commutative: true - documentation: !string |- - // NAME checks if elements are NaN. Use as x.IsNan(x). - go: NotEqual constImm: 4 commutative: true diff --git a/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml index 3f6c8a45b6..6dbfb57343 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Compares/go.yaml @@ -121,7 +121,7 @@ - class: mask # Floats -- go: Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) regexpTag: "compares" asm: "VCMPP[SD]" in: @@ -135,7 +135,7 @@ - go: $t overwriteBase: int overwriteClass: mask -- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual|IsNan) +- go: (Equal|Greater|Less|LessEqual|GreaterEqual|NotEqual) regexpTag: "compares" asm: "VCMPP[SD]" in: diff --git a/src/simd/archsimd/extra_amd64.go b/src/simd/archsimd/extra_amd64.go index cd5a3230b9..b0dba6d234 100644 --- a/src/simd/archsimd/extra_amd64.go +++ b/src/simd/archsimd/extra_amd64.go @@ -143,3 +143,39 @@ func (x Uint64x2) IsZero() bool // // Asm: VPTEST, CPU Feature: AVX func (x Uint64x4) IsZero() bool + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) IsNaN() Mask32x4 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) IsNaN() Mask32x8 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPS, CPU Feature: AVX512 +func (x Float32x16) IsNaN() Mask32x16 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) IsNaN() Mask64x2 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) IsNaN() Mask64x4 + +// IsNaN returns a mask whose elements indicate whether the corresponding +// elements of x are NaN. +// +// Asm: VCMPPD, CPU Feature: AVX512 +func (x Float64x8) IsNaN() Mask64x8 diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index 95d417b3df..eba340c793 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -3446,38 +3446,6 @@ func (x Uint64x4) InterleaveLoGrouped(y Uint64x4) Uint64x4 // Asm: VPUNPCKLQDQ, CPU Feature: AVX512 func (x Uint64x8) InterleaveLoGrouped(y Uint64x8) Uint64x8 -/* IsNan */ - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x4) IsNan(y Float32x4) Mask32x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX -func (x Float32x8) IsNan(y Float32x8) Mask32x8 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPS, CPU Feature: AVX512 -func (x Float32x16) IsNan(y Float32x16) Mask32x16 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x2) IsNan(y Float64x2) Mask64x2 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX -func (x Float64x4) IsNan(y Float64x4) Mask64x4 - -// IsNan checks if elements are NaN. Use as x.IsNan(x). -// -// Asm: VCMPPD, CPU Feature: AVX512 -func (x Float64x8) IsNan(y Float64x8) Mask64x8 - /* LeadingZeros */ // LeadingZeros counts the leading zeros of each element in x.