From: Cherry Mui Date: Mon, 29 Dec 2025 16:30:33 +0000 (-0500) Subject: simd/archsimd: remove redundant suffix of ExtendLo operations X-Git-Tag: go1.26rc2~7^2~19 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=76dddce29302681bdddafd4cbc27db66802414dd;p=gostls13.git simd/archsimd: remove redundant suffix of ExtendLo operations For methods like ExtendLo2ToInt64x2, the last "x2" is redundant, as it is already mentioned in "Lo2". Remove it, so it is just ExtendLo2ToInt64. Change-Id: I490afd818c40bb7a4ef15c249723895735bd6488 Reviewed-on: https://go-review.googlesource.com/c/go/+/733100 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index a7a6a3f7a1..8a5b70da30 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -376,26 +376,26 @@ (ExpandUint64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM mask)) (ExpandUint64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM mask)) (ExpandUint64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM mask)) -(ExtendLo2ToInt64x2Int8x16 ...) => (VPMOVSXBQ128 ...) -(ExtendLo2ToInt64x2Int16x8 ...) => (VPMOVSXWQ128 ...) -(ExtendLo2ToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...) -(ExtendLo2ToUint64x2Uint8x16 ...) => (VPMOVZXBQ128 ...) -(ExtendLo2ToUint64x2Uint16x8 ...) => (VPMOVZXWQ128 ...) -(ExtendLo2ToUint64x2Uint32x4 ...) => (VPMOVZXDQ128 ...) -(ExtendLo4ToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...) -(ExtendLo4ToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...) -(ExtendLo4ToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...) -(ExtendLo4ToInt64x4Int16x8 ...) => (VPMOVSXWQ256 ...) -(ExtendLo4ToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...) -(ExtendLo4ToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...) -(ExtendLo4ToUint64x4Uint8x16 ...) => (VPMOVZXBQ256 ...) -(ExtendLo4ToUint64x4Uint16x8 ...) => (VPMOVZXWQ256 ...) -(ExtendLo8ToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...) -(ExtendLo8ToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...) -(ExtendLo8ToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...) -(ExtendLo8ToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...) -(ExtendLo8ToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...) -(ExtendLo8ToUint64x8Uint8x16 ...) => (VPMOVZXBQ512 ...) +(ExtendLo2ToInt64Int8x16 ...) => (VPMOVSXBQ128 ...) +(ExtendLo2ToInt64Int16x8 ...) => (VPMOVSXWQ128 ...) +(ExtendLo2ToInt64Int32x4 ...) => (VPMOVSXDQ128 ...) +(ExtendLo2ToUint64Uint8x16 ...) => (VPMOVZXBQ128 ...) +(ExtendLo2ToUint64Uint16x8 ...) => (VPMOVZXWQ128 ...) +(ExtendLo2ToUint64Uint32x4 ...) => (VPMOVZXDQ128 ...) +(ExtendLo4ToInt32Int8x16 ...) => (VPMOVSXBD128 ...) +(ExtendLo4ToInt32Int16x8 ...) => (VPMOVSXWD128 ...) +(ExtendLo4ToInt64Int8x16 ...) => (VPMOVSXBQ256 ...) +(ExtendLo4ToInt64Int16x8 ...) => (VPMOVSXWQ256 ...) +(ExtendLo4ToUint32Uint8x16 ...) => (VPMOVZXBD128 ...) +(ExtendLo4ToUint32Uint16x8 ...) => (VPMOVZXWD128 ...) +(ExtendLo4ToUint64Uint8x16 ...) => (VPMOVZXBQ256 ...) +(ExtendLo4ToUint64Uint16x8 ...) => (VPMOVZXWQ256 ...) +(ExtendLo8ToInt16Int8x16 ...) => (VPMOVSXBW128 ...) +(ExtendLo8ToInt32Int8x16 ...) => (VPMOVSXBD256 ...) +(ExtendLo8ToInt64Int8x16 ...) => (VPMOVSXBQ512 ...) +(ExtendLo8ToUint16Uint8x16 ...) => (VPMOVZXBW128 ...) +(ExtendLo8ToUint32Uint8x16 ...) => (VPMOVZXBD256 ...) +(ExtendLo8ToUint64Uint8x16 ...) => (VPMOVZXBQ512 ...) (ExtendToInt16Int8x16 ...) => (VPMOVSXBW256 ...) (ExtendToInt16Int8x32 ...) => (VPMOVSXBW512 ...) (ExtendToInt32Int8x16 ...) => (VPMOVSXBD512 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index fd43f3d61f..af1007cd54 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -364,26 +364,26 @@ func simdGenericOps() []opData { {name: "ExpandUint64x2", argLength: 2, commutative: false}, {name: "ExpandUint64x4", argLength: 2, commutative: false}, {name: "ExpandUint64x8", argLength: 2, commutative: false}, - {name: "ExtendLo2ToInt64x2Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo2ToInt64x2Int16x8", argLength: 1, commutative: false}, - {name: "ExtendLo2ToInt64x2Int32x4", argLength: 1, commutative: false}, - {name: "ExtendLo2ToUint64x2Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo2ToUint64x2Uint16x8", argLength: 1, commutative: false}, - {name: "ExtendLo2ToUint64x2Uint32x4", argLength: 1, commutative: false}, - {name: "ExtendLo4ToInt32x4Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo4ToInt32x4Int16x8", argLength: 1, commutative: false}, - {name: "ExtendLo4ToInt64x4Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo4ToInt64x4Int16x8", argLength: 1, commutative: false}, - {name: "ExtendLo4ToUint32x4Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo4ToUint32x4Uint16x8", argLength: 1, commutative: false}, - {name: "ExtendLo4ToUint64x4Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo4ToUint64x4Uint16x8", argLength: 1, commutative: false}, - {name: "ExtendLo8ToInt16x8Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToInt32x8Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToInt64x8Int8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToUint16x8Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToUint32x8Uint8x16", argLength: 1, commutative: false}, - {name: "ExtendLo8ToUint64x8Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo2ToInt64Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo2ToInt64Int16x8", argLength: 1, commutative: false}, + {name: "ExtendLo2ToInt64Int32x4", argLength: 1, commutative: false}, + {name: "ExtendLo2ToUint64Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo2ToUint64Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendLo2ToUint64Uint32x4", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt32Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt32Int16x8", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt64Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToInt64Int16x8", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint32Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint32Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint64Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo4ToUint64Uint16x8", argLength: 1, commutative: false}, + {name: "ExtendLo8ToInt16Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToInt32Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToInt64Int8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToUint16Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToUint32Uint8x16", argLength: 1, commutative: false}, + {name: "ExtendLo8ToUint64Uint8x16", argLength: 1, commutative: false}, {name: "ExtendToInt16Int8x16", argLength: 1, commutative: false}, {name: "ExtendToInt16Int8x32", argLength: 1, commutative: false}, {name: "ExtendToInt32Int8x16", argLength: 1, commutative: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index f318adfd2f..d2ba15f740 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -6518,26 +6518,26 @@ const ( OpExpandUint64x2 OpExpandUint64x4 OpExpandUint64x8 - OpExtendLo2ToInt64x2Int8x16 - OpExtendLo2ToInt64x2Int16x8 - OpExtendLo2ToInt64x2Int32x4 - OpExtendLo2ToUint64x2Uint8x16 - OpExtendLo2ToUint64x2Uint16x8 - OpExtendLo2ToUint64x2Uint32x4 - OpExtendLo4ToInt32x4Int8x16 - OpExtendLo4ToInt32x4Int16x8 - OpExtendLo4ToInt64x4Int8x16 - OpExtendLo4ToInt64x4Int16x8 - OpExtendLo4ToUint32x4Uint8x16 - OpExtendLo4ToUint32x4Uint16x8 - OpExtendLo4ToUint64x4Uint8x16 - OpExtendLo4ToUint64x4Uint16x8 - OpExtendLo8ToInt16x8Int8x16 - OpExtendLo8ToInt32x8Int8x16 - OpExtendLo8ToInt64x8Int8x16 - OpExtendLo8ToUint16x8Uint8x16 - OpExtendLo8ToUint32x8Uint8x16 - OpExtendLo8ToUint64x8Uint8x16 + OpExtendLo2ToInt64Int8x16 + OpExtendLo2ToInt64Int16x8 + OpExtendLo2ToInt64Int32x4 + OpExtendLo2ToUint64Uint8x16 + OpExtendLo2ToUint64Uint16x8 + OpExtendLo2ToUint64Uint32x4 + OpExtendLo4ToInt32Int8x16 + OpExtendLo4ToInt32Int16x8 + OpExtendLo4ToInt64Int8x16 + OpExtendLo4ToInt64Int16x8 + OpExtendLo4ToUint32Uint8x16 + OpExtendLo4ToUint32Uint16x8 + OpExtendLo4ToUint64Uint8x16 + OpExtendLo4ToUint64Uint16x8 + OpExtendLo8ToInt16Int8x16 + OpExtendLo8ToInt32Int8x16 + OpExtendLo8ToInt64Int8x16 + OpExtendLo8ToUint16Uint8x16 + OpExtendLo8ToUint32Uint8x16 + OpExtendLo8ToUint64Uint8x16 OpExtendToInt16Int8x16 OpExtendToInt16Int8x32 OpExtendToInt32Int8x16 @@ -90896,102 +90896,102 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "ExtendLo2ToInt64x2Int8x16", + name: "ExtendLo2ToInt64Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo2ToInt64x2Int16x8", + name: "ExtendLo2ToInt64Int16x8", argLen: 1, generic: true, }, { - name: "ExtendLo2ToInt64x2Int32x4", + name: "ExtendLo2ToInt64Int32x4", argLen: 1, generic: true, }, { - name: "ExtendLo2ToUint64x2Uint8x16", + name: "ExtendLo2ToUint64Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo2ToUint64x2Uint16x8", + name: "ExtendLo2ToUint64Uint16x8", argLen: 1, generic: true, }, { - name: "ExtendLo2ToUint64x2Uint32x4", + name: "ExtendLo2ToUint64Uint32x4", argLen: 1, generic: true, }, { - name: "ExtendLo4ToInt32x4Int8x16", + name: "ExtendLo4ToInt32Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo4ToInt32x4Int16x8", + name: "ExtendLo4ToInt32Int16x8", argLen: 1, generic: true, }, { - name: "ExtendLo4ToInt64x4Int8x16", + name: "ExtendLo4ToInt64Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo4ToInt64x4Int16x8", + name: "ExtendLo4ToInt64Int16x8", argLen: 1, generic: true, }, { - name: "ExtendLo4ToUint32x4Uint8x16", + name: "ExtendLo4ToUint32Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo4ToUint32x4Uint16x8", + name: "ExtendLo4ToUint32Uint16x8", argLen: 1, generic: true, }, { - name: "ExtendLo4ToUint64x4Uint8x16", + name: "ExtendLo4ToUint64Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo4ToUint64x4Uint16x8", + name: "ExtendLo4ToUint64Uint16x8", argLen: 1, generic: true, }, { - name: "ExtendLo8ToInt16x8Int8x16", + name: "ExtendLo8ToInt16Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToInt32x8Int8x16", + name: "ExtendLo8ToInt32Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToInt64x8Int8x16", + name: "ExtendLo8ToInt64Int8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToUint16x8Uint8x16", + name: "ExtendLo8ToUint16Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToUint32x8Uint8x16", + name: "ExtendLo8ToUint32Uint8x16", argLen: 1, generic: true, }, { - name: "ExtendLo8ToUint64x8Uint8x16", + name: "ExtendLo8ToUint64Uint8x16", argLen: 1, generic: true, }, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 5fed6a8063..0c04410074 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -3274,64 +3274,64 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpExpandUint8x32(v) case OpExpandUint8x64: return rewriteValueAMD64_OpExpandUint8x64(v) - case OpExtendLo2ToInt64x2Int16x8: + case OpExtendLo2ToInt64Int16x8: v.Op = OpAMD64VPMOVSXWQ128 return true - case OpExtendLo2ToInt64x2Int32x4: + case OpExtendLo2ToInt64Int32x4: v.Op = OpAMD64VPMOVSXDQ128 return true - case OpExtendLo2ToInt64x2Int8x16: + case OpExtendLo2ToInt64Int8x16: v.Op = OpAMD64VPMOVSXBQ128 return true - case OpExtendLo2ToUint64x2Uint16x8: + case OpExtendLo2ToUint64Uint16x8: v.Op = OpAMD64VPMOVZXWQ128 return true - case OpExtendLo2ToUint64x2Uint32x4: + case OpExtendLo2ToUint64Uint32x4: v.Op = OpAMD64VPMOVZXDQ128 return true - case OpExtendLo2ToUint64x2Uint8x16: + case OpExtendLo2ToUint64Uint8x16: v.Op = OpAMD64VPMOVZXBQ128 return true - case OpExtendLo4ToInt32x4Int16x8: + case OpExtendLo4ToInt32Int16x8: v.Op = OpAMD64VPMOVSXWD128 return true - case OpExtendLo4ToInt32x4Int8x16: + case OpExtendLo4ToInt32Int8x16: v.Op = OpAMD64VPMOVSXBD128 return true - case OpExtendLo4ToInt64x4Int16x8: + case OpExtendLo4ToInt64Int16x8: v.Op = OpAMD64VPMOVSXWQ256 return true - case OpExtendLo4ToInt64x4Int8x16: + case OpExtendLo4ToInt64Int8x16: v.Op = OpAMD64VPMOVSXBQ256 return true - case OpExtendLo4ToUint32x4Uint16x8: + case OpExtendLo4ToUint32Uint16x8: v.Op = OpAMD64VPMOVZXWD128 return true - case OpExtendLo4ToUint32x4Uint8x16: + case OpExtendLo4ToUint32Uint8x16: v.Op = OpAMD64VPMOVZXBD128 return true - case OpExtendLo4ToUint64x4Uint16x8: + case OpExtendLo4ToUint64Uint16x8: v.Op = OpAMD64VPMOVZXWQ256 return true - case OpExtendLo4ToUint64x4Uint8x16: + case OpExtendLo4ToUint64Uint8x16: v.Op = OpAMD64VPMOVZXBQ256 return true - case OpExtendLo8ToInt16x8Int8x16: + case OpExtendLo8ToInt16Int8x16: v.Op = OpAMD64VPMOVSXBW128 return true - case OpExtendLo8ToInt32x8Int8x16: + case OpExtendLo8ToInt32Int8x16: v.Op = OpAMD64VPMOVSXBD256 return true - case OpExtendLo8ToInt64x8Int8x16: + case OpExtendLo8ToInt64Int8x16: v.Op = OpAMD64VPMOVSXBQ512 return true - case OpExtendLo8ToUint16x8Uint8x16: + case OpExtendLo8ToUint16Uint8x16: v.Op = OpAMD64VPMOVZXBW128 return true - case OpExtendLo8ToUint32x8Uint8x16: + case OpExtendLo8ToUint32Uint8x16: v.Op = OpAMD64VPMOVZXBD256 return true - case OpExtendLo8ToUint64x8Uint8x16: + case OpExtendLo8ToUint64Uint8x16: v.Op = OpAMD64VPMOVZXBQ512 return true case OpExtendToInt16Int8x16: diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 8b9fe3afdc..5a95761228 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -388,26 +388,26 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint64x2.Expand", opLen2(ssa.OpExpandUint64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint64x4.Expand", opLen2(ssa.OpExpandUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Expand", opLen2(ssa.OpExpandUint64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x8.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x4.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x8.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x4.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x8.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int16x8.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x8.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint16x8.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint16x8, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo8ToInt16x8", opLen1(ssa.OpExtendLo8ToInt16x8Int8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo8ToInt32x8", opLen1(ssa.OpExtendLo8ToInt32x8Int8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Int8x16.ExtendLo8ToInt64x8", opLen1(ssa.OpExtendLo8ToInt64x8Int8x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo8ToUint16x8", opLen1(ssa.OpExtendLo8ToUint16x8Uint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo8ToUint32x8", opLen1(ssa.OpExtendLo8ToUint32x8Uint8x16, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.ExtendLo8ToUint64x8", opLen1(ssa.OpExtendLo8ToUint64x8Uint8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x4.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x4.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo4ToInt32", opLen1(ssa.OpExtendLo4ToInt32Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendLo4ToInt32", opLen1(ssa.OpExtendLo4ToInt32Int16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo4ToInt64", opLen1(ssa.OpExtendLo4ToInt64Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x8.ExtendLo4ToInt64", opLen1(ssa.OpExtendLo4ToInt64Int16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo4ToUint32", opLen1(ssa.OpExtendLo4ToUint32Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendLo4ToUint32", opLen1(ssa.OpExtendLo4ToUint32Uint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo4ToUint64", opLen1(ssa.OpExtendLo4ToUint64Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x8.ExtendLo4ToUint64", opLen1(ssa.OpExtendLo4ToUint64Uint16x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo8ToInt16", opLen1(ssa.OpExtendLo8ToInt16Int8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo8ToInt32", opLen1(ssa.OpExtendLo8ToInt32Int8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x16.ExtendLo8ToInt64", opLen1(ssa.OpExtendLo8ToInt64Int8x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo8ToUint16", opLen1(ssa.OpExtendLo8ToUint16Uint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo8ToUint32", opLen1(ssa.OpExtendLo8ToUint32Uint8x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.ExtendLo8ToUint64", opLen1(ssa.OpExtendLo8ToUint64Uint8x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x32.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int8x16.ExtendToInt32", opLen1(ssa.OpExtendToInt32Int8x16, types.TypeVec512), sys.AMD64) diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml index 88e4840920..698e6d9956 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml @@ -135,83 +135,83 @@ documentation: !string |- // NAME zero-extends element values to uint64. # low-part only Int <-> Int conversions -- go: ExtendLo8ToUint16x8 +- go: ExtendLo8ToUint16 commutative: false documentation: !string |- // NAME zero-extends 8 lowest vector element values to uint16. -- go: ExtendLo8ToInt16x8 +- go: ExtendLo8ToInt16 commutative: false documentation: !string |- // NAME sign-extends 8 lowest vector element values to int16. -- go: ExtendLo4ToUint32x4 +- go: ExtendLo4ToUint32 commutative: false documentation: !string |- // NAME zero-extends 4 lowest vector element values to uint32. -- go: ExtendLo4ToInt32x4 +- go: ExtendLo4ToInt32 commutative: false documentation: !string |- // NAME sign-extends 4 lowest vector element values to int32. -- go: ExtendLo2ToUint64x2 +- go: ExtendLo2ToUint64 commutative: false documentation: !string |- // NAME zero-extends 2 lowest vector element values to uint64. -- go: ExtendLo2ToInt64x2 +- go: ExtendLo2ToInt64 commutative: false documentation: !string |- // NAME sign-extends 2 lowest vector element values to int64. -- go: ExtendLo2ToUint64x2 +- go: ExtendLo2ToUint64 commutative: false documentation: !string |- // NAME zero-extends 2 lowest vector element values to uint64. -- go: ExtendLo4ToUint64x4 +- go: ExtendLo4ToUint64 commutative: false documentation: !string |- // NAME zero-extends 4 lowest vector element values to uint64. -- go: ExtendLo2ToInt64x2 +- go: ExtendLo2ToInt64 commutative: false documentation: !string |- // NAME sign-extends 2 lowest vector element values to int64. -- go: ExtendLo4ToInt64x4 +- go: ExtendLo4ToInt64 commutative: false documentation: !string |- // NAME sign-extends 4 lowest vector element values to int64. -- go: ExtendLo4ToUint32x4 +- go: ExtendLo4ToUint32 commutative: false documentation: !string |- // NAME zero-extends 4 lowest vector element values to uint32. -- go: ExtendLo8ToUint32x8 +- go: ExtendLo8ToUint32 commutative: false documentation: !string |- // NAME zero-extends 8 lowest vector element values to uint32. -- go: ExtendLo4ToInt32x4 +- go: ExtendLo4ToInt32 commutative: false documentation: !string |- // NAME sign-extends 4 lowest vector element values to int32. -- go: ExtendLo8ToInt32x8 +- go: ExtendLo8ToInt32 commutative: false documentation: !string |- // NAME sign-extends 8 lowest vector element values to int32. -- go: ExtendLo2ToUint64x2 +- go: ExtendLo2ToUint64 commutative: false documentation: !string |- // NAME zero-extends 2 lowest vector element values to uint64. -- go: ExtendLo4ToUint64x4 +- go: ExtendLo4ToUint64 commutative: false documentation: !string |- // NAME zero-extends 4 lowest vector element values to uint64. -- go: ExtendLo8ToUint64x8 +- go: ExtendLo8ToUint64 commutative: false documentation: !string |- // NAME zero-extends 8 lowest vector element values to uint64. -- go: ExtendLo2ToInt64x2 +- go: ExtendLo2ToInt64 commutative: false documentation: !string |- // NAME sign-extends 2 lowest vector element values to int64. -- go: ExtendLo4ToInt64x4 +- go: ExtendLo4ToInt64 commutative: false documentation: !string |- // NAME sign-extends 4 lowest vector element values to int64. -- go: ExtendLo8ToInt64x8 +- go: ExtendLo8ToInt64 commutative: false documentation: !string |- // NAME sign-extends 8 lowest vector element values to int64. diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml index f436be23ef..2f19d12616 100644 --- a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml +++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml @@ -491,7 +491,7 @@ # low-part only conversions. # uint8->uint16 -- go: ExtendLo8ToUint16x8 +- go: ExtendLo8ToUint16 regexpTag: "convert" asm: "VPMOVZXBW" in: @@ -499,7 +499,7 @@ out: - *u16x8 # int8->int16 -- go: ExtendLo8ToInt16x8 +- go: ExtendLo8ToInt16 regexpTag: "convert" asm: "VPMOVSXBW" in: @@ -507,7 +507,7 @@ out: - *i16x8 # uint16->uint32 -- go: ExtendLo4ToUint32x4 +- go: ExtendLo4ToUint32 regexpTag: "convert" asm: "VPMOVZXWD" in: @@ -515,7 +515,7 @@ out: - *u32x4 # int16->int32 -- go: ExtendLo4ToInt32x4 +- go: ExtendLo4ToInt32 regexpTag: "convert" asm: "VPMOVSXWD" in: @@ -523,7 +523,7 @@ out: - *i32x4 # uint32 -> uint64 -- go: ExtendLo2ToUint64x2 +- go: ExtendLo2ToUint64 regexpTag: "convert" asm: "VPMOVZXDQ" in: @@ -534,7 +534,7 @@ elemBits: 64 bits: 128 # int32 -> int64 -- go: ExtendLo2ToInt64x2 +- go: ExtendLo2ToInt64 regexpTag: "convert" asm: "VPMOVSXDQ" in: @@ -545,14 +545,14 @@ elemBits: 64 bits: 128 # uint16 -> uint64 -- go: ExtendLo2ToUint64x2 +- go: ExtendLo2ToUint64 regexpTag: "convert" asm: "VPMOVZXWQ" in: - *u16x8 out: - *u64x2 -- go: ExtendLo4ToUint64x4 +- go: ExtendLo4ToUint64 regexpTag: "convert" asm: "VPMOVZXWQ" in: @@ -560,14 +560,14 @@ out: - *u64x4 # int16 -> int64 -- go: ExtendLo2ToInt64x2 +- go: ExtendLo2ToInt64 regexpTag: "convert" asm: "VPMOVSXWQ" in: - *i16x8 out: - *i64x2 -- go: ExtendLo4ToInt64x4 +- go: ExtendLo4ToInt64 regexpTag: "convert" asm: "VPMOVSXWQ" in: @@ -575,14 +575,14 @@ out: - *i64x4 # uint8 -> uint32 -- go: ExtendLo4ToUint32x4 +- go: ExtendLo4ToUint32 regexpTag: "convert" asm: "VPMOVZXBD" in: - *u8x16 out: - *u32x4 -- go: ExtendLo8ToUint32x8 +- go: ExtendLo8ToUint32 regexpTag: "convert" asm: "VPMOVZXBD" in: @@ -590,14 +590,14 @@ out: - *u32x8 # int8 -> int32 -- go: ExtendLo4ToInt32x4 +- go: ExtendLo4ToInt32 regexpTag: "convert" asm: "VPMOVSXBD" in: - *i8x16 out: - *i32x4 -- go: ExtendLo8ToInt32x8 +- go: ExtendLo8ToInt32 regexpTag: "convert" asm: "VPMOVSXBD" in: @@ -605,21 +605,21 @@ out: - *i32x8 # uint8 -> uint64 -- go: ExtendLo2ToUint64x2 +- go: ExtendLo2ToUint64 regexpTag: "convert" asm: "VPMOVZXBQ" in: - *u8x16 out: - *u64x2 -- go: ExtendLo4ToUint64x4 +- go: ExtendLo4ToUint64 regexpTag: "convert" asm: "VPMOVZXBQ" in: - *u8x16 out: - *u64x4 -- go: ExtendLo8ToUint64x8 +- go: ExtendLo8ToUint64 regexpTag: "convert" asm: "VPMOVZXBQ" in: @@ -627,21 +627,21 @@ out: - *u64x8 # int8 -> int64 -- go: ExtendLo2ToInt64x2 +- go: ExtendLo2ToInt64 regexpTag: "convert" asm: "VPMOVSXBQ" in: - *i8x16 out: - *i64x2 -- go: ExtendLo4ToInt64x4 +- go: ExtendLo4ToInt64 regexpTag: "convert" asm: "VPMOVSXBQ" in: - *i8x16 out: - *i64x4 -- go: ExtendLo8ToInt64x8 +- go: ExtendLo8ToInt64 regexpTag: "convert" asm: "VPMOVSXBQ" in: diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index 20ae3b1cc2..522a98caea 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -2325,129 +2325,129 @@ func (x Uint64x4) Expand(mask Mask64x4) Uint64x4 // Asm: VPEXPANDQ, CPU Feature: AVX512 func (x Uint64x8) Expand(mask Mask64x8) Uint64x8 -/* ExtendLo2ToInt64x2 */ +/* ExtendLo2ToInt64 */ -// ExtendLo2ToInt64x2 sign-extends 2 lowest vector element values to int64. +// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64. // // Asm: VPMOVSXBQ, CPU Feature: AVX -func (x Int8x16) ExtendLo2ToInt64x2() Int64x2 +func (x Int8x16) ExtendLo2ToInt64() Int64x2 -// ExtendLo2ToInt64x2 sign-extends 2 lowest vector element values to int64. +// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64. // // Asm: VPMOVSXWQ, CPU Feature: AVX -func (x Int16x8) ExtendLo2ToInt64x2() Int64x2 +func (x Int16x8) ExtendLo2ToInt64() Int64x2 -// ExtendLo2ToInt64x2 sign-extends 2 lowest vector element values to int64. +// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64. // // Asm: VPMOVSXDQ, CPU Feature: AVX -func (x Int32x4) ExtendLo2ToInt64x2() Int64x2 +func (x Int32x4) ExtendLo2ToInt64() Int64x2 -/* ExtendLo2ToUint64x2 */ +/* ExtendLo2ToUint64 */ -// ExtendLo2ToUint64x2 zero-extends 2 lowest vector element values to uint64. +// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64. // // Asm: VPMOVZXBQ, CPU Feature: AVX -func (x Uint8x16) ExtendLo2ToUint64x2() Uint64x2 +func (x Uint8x16) ExtendLo2ToUint64() Uint64x2 -// ExtendLo2ToUint64x2 zero-extends 2 lowest vector element values to uint64. +// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64. // // Asm: VPMOVZXWQ, CPU Feature: AVX -func (x Uint16x8) ExtendLo2ToUint64x2() Uint64x2 +func (x Uint16x8) ExtendLo2ToUint64() Uint64x2 -// ExtendLo2ToUint64x2 zero-extends 2 lowest vector element values to uint64. +// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64. // // Asm: VPMOVZXDQ, CPU Feature: AVX -func (x Uint32x4) ExtendLo2ToUint64x2() Uint64x2 +func (x Uint32x4) ExtendLo2ToUint64() Uint64x2 -/* ExtendLo4ToInt32x4 */ +/* ExtendLo4ToInt32 */ -// ExtendLo4ToInt32x4 sign-extends 4 lowest vector element values to int32. +// ExtendLo4ToInt32 sign-extends 4 lowest vector element values to int32. // // Asm: VPMOVSXBD, CPU Feature: AVX -func (x Int8x16) ExtendLo4ToInt32x4() Int32x4 +func (x Int8x16) ExtendLo4ToInt32() Int32x4 -// ExtendLo4ToInt32x4 sign-extends 4 lowest vector element values to int32. +// ExtendLo4ToInt32 sign-extends 4 lowest vector element values to int32. // // Asm: VPMOVSXWD, CPU Feature: AVX -func (x Int16x8) ExtendLo4ToInt32x4() Int32x4 +func (x Int16x8) ExtendLo4ToInt32() Int32x4 -/* ExtendLo4ToInt64x4 */ +/* ExtendLo4ToInt64 */ -// ExtendLo4ToInt64x4 sign-extends 4 lowest vector element values to int64. +// ExtendLo4ToInt64 sign-extends 4 lowest vector element values to int64. // // Asm: VPMOVSXBQ, CPU Feature: AVX2 -func (x Int8x16) ExtendLo4ToInt64x4() Int64x4 +func (x Int8x16) ExtendLo4ToInt64() Int64x4 -// ExtendLo4ToInt64x4 sign-extends 4 lowest vector element values to int64. +// ExtendLo4ToInt64 sign-extends 4 lowest vector element values to int64. // // Asm: VPMOVSXWQ, CPU Feature: AVX2 -func (x Int16x8) ExtendLo4ToInt64x4() Int64x4 +func (x Int16x8) ExtendLo4ToInt64() Int64x4 -/* ExtendLo4ToUint32x4 */ +/* ExtendLo4ToUint32 */ -// ExtendLo4ToUint32x4 zero-extends 4 lowest vector element values to uint32. +// ExtendLo4ToUint32 zero-extends 4 lowest vector element values to uint32. // // Asm: VPMOVZXBD, CPU Feature: AVX -func (x Uint8x16) ExtendLo4ToUint32x4() Uint32x4 +func (x Uint8x16) ExtendLo4ToUint32() Uint32x4 -// ExtendLo4ToUint32x4 zero-extends 4 lowest vector element values to uint32. +// ExtendLo4ToUint32 zero-extends 4 lowest vector element values to uint32. // // Asm: VPMOVZXWD, CPU Feature: AVX -func (x Uint16x8) ExtendLo4ToUint32x4() Uint32x4 +func (x Uint16x8) ExtendLo4ToUint32() Uint32x4 -/* ExtendLo4ToUint64x4 */ +/* ExtendLo4ToUint64 */ -// ExtendLo4ToUint64x4 zero-extends 4 lowest vector element values to uint64. +// ExtendLo4ToUint64 zero-extends 4 lowest vector element values to uint64. // // Asm: VPMOVZXBQ, CPU Feature: AVX2 -func (x Uint8x16) ExtendLo4ToUint64x4() Uint64x4 +func (x Uint8x16) ExtendLo4ToUint64() Uint64x4 -// ExtendLo4ToUint64x4 zero-extends 4 lowest vector element values to uint64. +// ExtendLo4ToUint64 zero-extends 4 lowest vector element values to uint64. // // Asm: VPMOVZXWQ, CPU Feature: AVX2 -func (x Uint16x8) ExtendLo4ToUint64x4() Uint64x4 +func (x Uint16x8) ExtendLo4ToUint64() Uint64x4 -/* ExtendLo8ToInt16x8 */ +/* ExtendLo8ToInt16 */ -// ExtendLo8ToInt16x8 sign-extends 8 lowest vector element values to int16. +// ExtendLo8ToInt16 sign-extends 8 lowest vector element values to int16. // // Asm: VPMOVSXBW, CPU Feature: AVX -func (x Int8x16) ExtendLo8ToInt16x8() Int16x8 +func (x Int8x16) ExtendLo8ToInt16() Int16x8 -/* ExtendLo8ToInt32x8 */ +/* ExtendLo8ToInt32 */ -// ExtendLo8ToInt32x8 sign-extends 8 lowest vector element values to int32. +// ExtendLo8ToInt32 sign-extends 8 lowest vector element values to int32. // // Asm: VPMOVSXBD, CPU Feature: AVX2 -func (x Int8x16) ExtendLo8ToInt32x8() Int32x8 +func (x Int8x16) ExtendLo8ToInt32() Int32x8 -/* ExtendLo8ToInt64x8 */ +/* ExtendLo8ToInt64 */ -// ExtendLo8ToInt64x8 sign-extends 8 lowest vector element values to int64. +// ExtendLo8ToInt64 sign-extends 8 lowest vector element values to int64. // // Asm: VPMOVSXBQ, CPU Feature: AVX512 -func (x Int8x16) ExtendLo8ToInt64x8() Int64x8 +func (x Int8x16) ExtendLo8ToInt64() Int64x8 -/* ExtendLo8ToUint16x8 */ +/* ExtendLo8ToUint16 */ -// ExtendLo8ToUint16x8 zero-extends 8 lowest vector element values to uint16. +// ExtendLo8ToUint16 zero-extends 8 lowest vector element values to uint16. // // Asm: VPMOVZXBW, CPU Feature: AVX -func (x Uint8x16) ExtendLo8ToUint16x8() Uint16x8 +func (x Uint8x16) ExtendLo8ToUint16() Uint16x8 -/* ExtendLo8ToUint32x8 */ +/* ExtendLo8ToUint32 */ -// ExtendLo8ToUint32x8 zero-extends 8 lowest vector element values to uint32. +// ExtendLo8ToUint32 zero-extends 8 lowest vector element values to uint32. // // Asm: VPMOVZXBD, CPU Feature: AVX2 -func (x Uint8x16) ExtendLo8ToUint32x8() Uint32x8 +func (x Uint8x16) ExtendLo8ToUint32() Uint32x8 -/* ExtendLo8ToUint64x8 */ +/* ExtendLo8ToUint64 */ -// ExtendLo8ToUint64x8 zero-extends 8 lowest vector element values to uint64. +// ExtendLo8ToUint64 zero-extends 8 lowest vector element values to uint64. // // Asm: VPMOVZXBQ, CPU Feature: AVX512 -func (x Uint8x16) ExtendLo8ToUint64x8() Uint64x8 +func (x Uint8x16) ExtendLo8ToUint64() Uint64x8 /* ExtendToInt16 */