]> Cypherpunks repositories - gostls13.git/commitdiff
simd/archsimd: rename Broadcast methods
authorCherry Mui <cherryyz@google.com>
Thu, 8 Jan 2026 16:57:28 +0000 (11:57 -0500)
committerJunyang Shao <shaojunyang@google.com>
Thu, 8 Jan 2026 17:44:00 +0000 (09:44 -0800)
Currently the Broadcast128/256/512 methods broadcast the lowest
element of the input vector to a vector of the corresponding width.
There are also variations of broadcast operations that broadcast
the whole (128- or 256-bit) vector to a larger vector, which we
don't yet support. Our current naming is unclear which version it
is, though. Rename the current ones to Broadcast1ToN, to be clear
that they broadcast one element. The vector version probably will
be named BoradcastAllToN (not included in this CL).

Change-Id: I47a21e367f948ec0b578d63706a40d20f5a9f46d
Reviewed-on: https://go-review.googlesource.com/c/go/+/734840
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/archsimd/_gen/simdgen/ops/Moves/categories.yaml
src/simd/archsimd/_gen/simdgen/ops/Moves/go.yaml
src/simd/archsimd/_gen/tmplgen/main.go
src/simd/archsimd/ops_amd64.go
src/simd/archsimd/other_gen_amd64.go

index c4d0fd69c6ba755acba0b780566780864edf0b99..a028cbe86d5639a13b81632f8bb75c8fc7f4f716 100644 (file)
@@ -25,23 +25,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPABSQ128,
                ssa.OpAMD64VPABSQ256,
                ssa.OpAMD64VPABSQ512,
-               ssa.OpAMD64VBROADCASTSS128,
                ssa.OpAMD64VPBROADCASTQ128,
-               ssa.OpAMD64VPBROADCASTB128,
-               ssa.OpAMD64VPBROADCASTW128,
+               ssa.OpAMD64VBROADCASTSS128,
+               ssa.OpAMD64VBROADCASTSD256,
                ssa.OpAMD64VPBROADCASTD128,
+               ssa.OpAMD64VPBROADCASTQ256,
                ssa.OpAMD64VBROADCASTSS256,
-               ssa.OpAMD64VBROADCASTSD256,
-               ssa.OpAMD64VPBROADCASTB256,
-               ssa.OpAMD64VPBROADCASTW256,
+               ssa.OpAMD64VBROADCASTSD512,
+               ssa.OpAMD64VPBROADCASTW128,
                ssa.OpAMD64VPBROADCASTD256,
-               ssa.OpAMD64VPBROADCASTQ256,
+               ssa.OpAMD64VPBROADCASTQ512,
                ssa.OpAMD64VBROADCASTSS512,
-               ssa.OpAMD64VBROADCASTSD512,
-               ssa.OpAMD64VPBROADCASTB512,
-               ssa.OpAMD64VPBROADCASTW512,
+               ssa.OpAMD64VPBROADCASTB128,
+               ssa.OpAMD64VPBROADCASTW256,
                ssa.OpAMD64VPBROADCASTD512,
-               ssa.OpAMD64VPBROADCASTQ512,
+               ssa.OpAMD64VPBROADCASTB256,
+               ssa.OpAMD64VPBROADCASTW512,
+               ssa.OpAMD64VPBROADCASTB512,
                ssa.OpAMD64VCVTPD2PSX128,
                ssa.OpAMD64VCVTPD2PSY128,
                ssa.OpAMD64VCVTPD2PS256,
@@ -832,23 +832,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPABSQMasked128,
                ssa.OpAMD64VPABSQMasked256,
                ssa.OpAMD64VPABSQMasked512,
-               ssa.OpAMD64VBROADCASTSSMasked128,
                ssa.OpAMD64VPBROADCASTQMasked128,
-               ssa.OpAMD64VPBROADCASTBMasked128,
-               ssa.OpAMD64VPBROADCASTWMasked128,
+               ssa.OpAMD64VBROADCASTSSMasked128,
+               ssa.OpAMD64VBROADCASTSDMasked256,
                ssa.OpAMD64VPBROADCASTDMasked128,
+               ssa.OpAMD64VPBROADCASTQMasked256,
                ssa.OpAMD64VBROADCASTSSMasked256,
-               ssa.OpAMD64VBROADCASTSDMasked256,
-               ssa.OpAMD64VPBROADCASTBMasked256,
-               ssa.OpAMD64VPBROADCASTWMasked256,
+               ssa.OpAMD64VBROADCASTSDMasked512,
+               ssa.OpAMD64VPBROADCASTWMasked128,
                ssa.OpAMD64VPBROADCASTDMasked256,
-               ssa.OpAMD64VPBROADCASTQMasked256,
+               ssa.OpAMD64VPBROADCASTQMasked512,
                ssa.OpAMD64VBROADCASTSSMasked512,
-               ssa.OpAMD64VBROADCASTSDMasked512,
-               ssa.OpAMD64VPBROADCASTBMasked512,
-               ssa.OpAMD64VPBROADCASTWMasked512,
+               ssa.OpAMD64VPBROADCASTBMasked128,
+               ssa.OpAMD64VPBROADCASTWMasked256,
                ssa.OpAMD64VPBROADCASTDMasked512,
-               ssa.OpAMD64VPBROADCASTQMasked512,
+               ssa.OpAMD64VPBROADCASTBMasked256,
+               ssa.OpAMD64VPBROADCASTWMasked512,
+               ssa.OpAMD64VPBROADCASTBMasked512,
                ssa.OpAMD64VCOMPRESSPSMasked128,
                ssa.OpAMD64VCOMPRESSPSMasked256,
                ssa.OpAMD64VCOMPRESSPSMasked512,
@@ -2460,23 +2460,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPABSQMasked128Merging,
                ssa.OpAMD64VPABSQMasked256Merging,
                ssa.OpAMD64VPABSQMasked512Merging,
-               ssa.OpAMD64VBROADCASTSSMasked128Merging,
                ssa.OpAMD64VPBROADCASTQMasked128Merging,
-               ssa.OpAMD64VPBROADCASTBMasked128Merging,
-               ssa.OpAMD64VPBROADCASTWMasked128Merging,
+               ssa.OpAMD64VBROADCASTSSMasked128Merging,
+               ssa.OpAMD64VBROADCASTSDMasked256Merging,
                ssa.OpAMD64VPBROADCASTDMasked128Merging,
+               ssa.OpAMD64VPBROADCASTQMasked256Merging,
                ssa.OpAMD64VBROADCASTSSMasked256Merging,
-               ssa.OpAMD64VBROADCASTSDMasked256Merging,
-               ssa.OpAMD64VPBROADCASTBMasked256Merging,
-               ssa.OpAMD64VPBROADCASTWMasked256Merging,
+               ssa.OpAMD64VBROADCASTSDMasked512Merging,
+               ssa.OpAMD64VPBROADCASTWMasked128Merging,
                ssa.OpAMD64VPBROADCASTDMasked256Merging,
-               ssa.OpAMD64VPBROADCASTQMasked256Merging,
+               ssa.OpAMD64VPBROADCASTQMasked512Merging,
                ssa.OpAMD64VBROADCASTSSMasked512Merging,
-               ssa.OpAMD64VBROADCASTSDMasked512Merging,
-               ssa.OpAMD64VPBROADCASTBMasked512Merging,
-               ssa.OpAMD64VPBROADCASTWMasked512Merging,
+               ssa.OpAMD64VPBROADCASTBMasked128Merging,
+               ssa.OpAMD64VPBROADCASTWMasked256Merging,
                ssa.OpAMD64VPBROADCASTDMasked512Merging,
-               ssa.OpAMD64VPBROADCASTQMasked512Merging,
+               ssa.OpAMD64VPBROADCASTBMasked256Merging,
+               ssa.OpAMD64VPBROADCASTWMasked512Merging,
+               ssa.OpAMD64VPBROADCASTBMasked512Merging,
                ssa.OpAMD64VRNDSCALEPSMasked128Merging,
                ssa.OpAMD64VRNDSCALEPSMasked256Merging,
                ssa.OpAMD64VRNDSCALEPSMasked512Merging,
@@ -2817,23 +2817,23 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPAVGWMasked128,
                ssa.OpAMD64VPAVGWMasked256,
                ssa.OpAMD64VPAVGWMasked512,
-               ssa.OpAMD64VBROADCASTSSMasked128,
                ssa.OpAMD64VPBROADCASTQMasked128,
-               ssa.OpAMD64VPBROADCASTBMasked128,
-               ssa.OpAMD64VPBROADCASTWMasked128,
+               ssa.OpAMD64VBROADCASTSSMasked128,
+               ssa.OpAMD64VBROADCASTSDMasked256,
                ssa.OpAMD64VPBROADCASTDMasked128,
+               ssa.OpAMD64VPBROADCASTQMasked256,
                ssa.OpAMD64VBROADCASTSSMasked256,
-               ssa.OpAMD64VBROADCASTSDMasked256,
-               ssa.OpAMD64VPBROADCASTBMasked256,
-               ssa.OpAMD64VPBROADCASTWMasked256,
+               ssa.OpAMD64VBROADCASTSDMasked512,
+               ssa.OpAMD64VPBROADCASTWMasked128,
                ssa.OpAMD64VPBROADCASTDMasked256,
-               ssa.OpAMD64VPBROADCASTQMasked256,
+               ssa.OpAMD64VPBROADCASTQMasked512,
                ssa.OpAMD64VBROADCASTSSMasked512,
-               ssa.OpAMD64VBROADCASTSDMasked512,
-               ssa.OpAMD64VPBROADCASTBMasked512,
-               ssa.OpAMD64VPBROADCASTWMasked512,
+               ssa.OpAMD64VPBROADCASTBMasked128,
+               ssa.OpAMD64VPBROADCASTWMasked256,
                ssa.OpAMD64VPBROADCASTDMasked512,
-               ssa.OpAMD64VPBROADCASTQMasked512,
+               ssa.OpAMD64VPBROADCASTBMasked256,
+               ssa.OpAMD64VPBROADCASTWMasked512,
+               ssa.OpAMD64VPBROADCASTBMasked512,
                ssa.OpAMD64VRNDSCALEPSMasked128,
                ssa.OpAMD64VRNDSCALEPSMasked128load,
                ssa.OpAMD64VRNDSCALEPSMasked256,
index 5c83f39a1fec33e41ee382a919ecd54a20f94733..799461610d320d86b0c69edef65d9b8c94114c19 100644 (file)
 (AverageUint16x8 ...) => (VPAVGW128 ...)
 (AverageUint16x16 ...) => (VPAVGW256 ...)
 (AverageUint16x32 ...) => (VPAVGW512 ...)
-(Broadcast128Float32x4 ...) => (VBROADCASTSS128 ...)
-(Broadcast128Float64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128Int8x16 ...) => (VPBROADCASTB128 ...)
-(Broadcast128Int16x8 ...) => (VPBROADCASTW128 ...)
-(Broadcast128Int32x4 ...) => (VPBROADCASTD128 ...)
-(Broadcast128Int64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast128Uint8x16 ...) => (VPBROADCASTB128 ...)
-(Broadcast128Uint16x8 ...) => (VPBROADCASTW128 ...)
-(Broadcast128Uint32x4 ...) => (VPBROADCASTD128 ...)
-(Broadcast128Uint64x2 ...) => (VPBROADCASTQ128 ...)
-(Broadcast256Float32x4 ...) => (VBROADCASTSS256 ...)
-(Broadcast256Float64x2 ...) => (VBROADCASTSD256 ...)
-(Broadcast256Int8x16 ...) => (VPBROADCASTB256 ...)
-(Broadcast256Int16x8 ...) => (VPBROADCASTW256 ...)
-(Broadcast256Int32x4 ...) => (VPBROADCASTD256 ...)
-(Broadcast256Int64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast256Uint8x16 ...) => (VPBROADCASTB256 ...)
-(Broadcast256Uint16x8 ...) => (VPBROADCASTW256 ...)
-(Broadcast256Uint32x4 ...) => (VPBROADCASTD256 ...)
-(Broadcast256Uint64x2 ...) => (VPBROADCASTQ256 ...)
-(Broadcast512Float32x4 ...) => (VBROADCASTSS512 ...)
-(Broadcast512Float64x2 ...) => (VBROADCASTSD512 ...)
-(Broadcast512Int8x16 ...) => (VPBROADCASTB512 ...)
-(Broadcast512Int16x8 ...) => (VPBROADCASTW512 ...)
-(Broadcast512Int32x4 ...) => (VPBROADCASTD512 ...)
-(Broadcast512Int64x2 ...) => (VPBROADCASTQ512 ...)
-(Broadcast512Uint8x16 ...) => (VPBROADCASTB512 ...)
-(Broadcast512Uint16x8 ...) => (VPBROADCASTW512 ...)
-(Broadcast512Uint32x4 ...) => (VPBROADCASTD512 ...)
-(Broadcast512Uint64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast1To2Float64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast1To2Int64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast1To2Uint64x2 ...) => (VPBROADCASTQ128 ...)
+(Broadcast1To4Float32x4 ...) => (VBROADCASTSS128 ...)
+(Broadcast1To4Float64x2 ...) => (VBROADCASTSD256 ...)
+(Broadcast1To4Int32x4 ...) => (VPBROADCASTD128 ...)
+(Broadcast1To4Int64x2 ...) => (VPBROADCASTQ256 ...)
+(Broadcast1To4Uint32x4 ...) => (VPBROADCASTD128 ...)
+(Broadcast1To4Uint64x2 ...) => (VPBROADCASTQ256 ...)
+(Broadcast1To8Float32x4 ...) => (VBROADCASTSS256 ...)
+(Broadcast1To8Float64x2 ...) => (VBROADCASTSD512 ...)
+(Broadcast1To8Int16x8 ...) => (VPBROADCASTW128 ...)
+(Broadcast1To8Int32x4 ...) => (VPBROADCASTD256 ...)
+(Broadcast1To8Int64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast1To8Uint16x8 ...) => (VPBROADCASTW128 ...)
+(Broadcast1To8Uint32x4 ...) => (VPBROADCASTD256 ...)
+(Broadcast1To8Uint64x2 ...) => (VPBROADCASTQ512 ...)
+(Broadcast1To16Float32x4 ...) => (VBROADCASTSS512 ...)
+(Broadcast1To16Int8x16 ...) => (VPBROADCASTB128 ...)
+(Broadcast1To16Int16x8 ...) => (VPBROADCASTW256 ...)
+(Broadcast1To16Int32x4 ...) => (VPBROADCASTD512 ...)
+(Broadcast1To16Uint8x16 ...) => (VPBROADCASTB128 ...)
+(Broadcast1To16Uint16x8 ...) => (VPBROADCASTW256 ...)
+(Broadcast1To16Uint32x4 ...) => (VPBROADCASTD512 ...)
+(Broadcast1To32Int8x16 ...) => (VPBROADCASTB256 ...)
+(Broadcast1To32Int16x8 ...) => (VPBROADCASTW512 ...)
+(Broadcast1To32Uint8x16 ...) => (VPBROADCASTB256 ...)
+(Broadcast1To32Uint16x8 ...) => (VPBROADCASTW512 ...)
+(Broadcast1To64Int8x16 ...) => (VPBROADCASTB512 ...)
+(Broadcast1To64Uint8x16 ...) => (VPBROADCASTB512 ...)
 (CeilFloat32x4 x) => (VROUNDPS128 [2] x)
 (CeilFloat32x8 x) => (VROUNDPS256 [2] x)
 (CeilFloat64x2 x) => (VROUNDPD128 [2] x)
 (VMOVDQU16Masked128 (VPAVGW128 x y) mask) => (VPAVGWMasked128 x y mask)
 (VMOVDQU16Masked256 (VPAVGW256 x y) mask) => (VPAVGWMasked256 x y mask)
 (VMOVDQU16Masked512 (VPAVGW512 x y) mask) => (VPAVGWMasked512 x y mask)
-(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
 (VMOVDQU64Masked128 (VPBROADCASTQ128 x) mask) => (VPBROADCASTQMasked128 x mask)
-(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
-(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
+(VMOVDQU32Masked128 (VBROADCASTSS128 x) mask) => (VBROADCASTSSMasked128 x mask)
+(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask)
 (VMOVDQU32Masked128 (VPBROADCASTD128 x) mask) => (VPBROADCASTDMasked128 x mask)
+(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask)
 (VMOVDQU32Masked256 (VBROADCASTSS256 x) mask) => (VBROADCASTSSMasked256 x mask)
-(VMOVDQU64Masked256 (VBROADCASTSD256 x) mask) => (VBROADCASTSDMasked256 x mask)
-(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
-(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
+(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask)
+(VMOVDQU16Masked128 (VPBROADCASTW128 x) mask) => (VPBROADCASTWMasked128 x mask)
 (VMOVDQU32Masked256 (VPBROADCASTD256 x) mask) => (VPBROADCASTDMasked256 x mask)
-(VMOVDQU64Masked256 (VPBROADCASTQ256 x) mask) => (VPBROADCASTQMasked256 x mask)
+(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask)
 (VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) => (VBROADCASTSSMasked512 x mask)
-(VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) => (VBROADCASTSDMasked512 x mask)
-(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
-(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
+(VMOVDQU8Masked128 (VPBROADCASTB128 x) mask) => (VPBROADCASTBMasked128 x mask)
+(VMOVDQU16Masked256 (VPBROADCASTW256 x) mask) => (VPBROADCASTWMasked256 x mask)
 (VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) => (VPBROADCASTDMasked512 x mask)
-(VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) => (VPBROADCASTQMasked512 x mask)
+(VMOVDQU8Masked256 (VPBROADCASTB256 x) mask) => (VPBROADCASTBMasked256 x mask)
+(VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) => (VPBROADCASTWMasked512 x mask)
+(VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) => (VPBROADCASTBMasked512 x mask)
 (VMOVDQU32Masked128 (VRNDSCALEPS128 [a] x) mask) => (VRNDSCALEPSMasked128 [a] x mask)
 (VMOVDQU32Masked256 (VRNDSCALEPS256 [a] x) mask) => (VRNDSCALEPSMasked256 [a] x mask)
 (VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) => (VRNDSCALEPSMasked512 [a] x mask)
index 889ab0d84ffa22b0da8174048ed7109bb19cd6b1..ff863a389f2bc796149e33a455be290df1eb0078 100644 (file)
@@ -143,36 +143,36 @@ func simdGenericOps() []opData {
                {name: "AverageUint16x8", argLength: 2, commutative: true},
                {name: "AverageUint16x16", argLength: 2, commutative: true},
                {name: "AverageUint16x32", argLength: 2, commutative: true},
-               {name: "Broadcast128Float32x4", argLength: 1, commutative: false},
-               {name: "Broadcast128Float64x2", argLength: 1, commutative: false},
-               {name: "Broadcast128Int8x16", argLength: 1, commutative: false},
-               {name: "Broadcast128Int16x8", argLength: 1, commutative: false},
-               {name: "Broadcast128Int32x4", argLength: 1, commutative: false},
-               {name: "Broadcast128Int64x2", argLength: 1, commutative: false},
-               {name: "Broadcast128Uint8x16", argLength: 1, commutative: false},
-               {name: "Broadcast128Uint16x8", argLength: 1, commutative: false},
-               {name: "Broadcast128Uint32x4", argLength: 1, commutative: false},
-               {name: "Broadcast128Uint64x2", argLength: 1, commutative: false},
-               {name: "Broadcast256Float32x4", argLength: 1, commutative: false},
-               {name: "Broadcast256Float64x2", argLength: 1, commutative: false},
-               {name: "Broadcast256Int8x16", argLength: 1, commutative: false},
-               {name: "Broadcast256Int16x8", argLength: 1, commutative: false},
-               {name: "Broadcast256Int32x4", argLength: 1, commutative: false},
-               {name: "Broadcast256Int64x2", argLength: 1, commutative: false},
-               {name: "Broadcast256Uint8x16", argLength: 1, commutative: false},
-               {name: "Broadcast256Uint16x8", argLength: 1, commutative: false},
-               {name: "Broadcast256Uint32x4", argLength: 1, commutative: false},
-               {name: "Broadcast256Uint64x2", argLength: 1, commutative: false},
-               {name: "Broadcast512Float32x4", argLength: 1, commutative: false},
-               {name: "Broadcast512Float64x2", argLength: 1, commutative: false},
-               {name: "Broadcast512Int8x16", argLength: 1, commutative: false},
-               {name: "Broadcast512Int16x8", argLength: 1, commutative: false},
-               {name: "Broadcast512Int32x4", argLength: 1, commutative: false},
-               {name: "Broadcast512Int64x2", argLength: 1, commutative: false},
-               {name: "Broadcast512Uint8x16", argLength: 1, commutative: false},
-               {name: "Broadcast512Uint16x8", argLength: 1, commutative: false},
-               {name: "Broadcast512Uint32x4", argLength: 1, commutative: false},
-               {name: "Broadcast512Uint64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To2Float64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To2Int64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To2Uint64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To4Float32x4", argLength: 1, commutative: false},
+               {name: "Broadcast1To4Float64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To4Int32x4", argLength: 1, commutative: false},
+               {name: "Broadcast1To4Int64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To4Uint32x4", argLength: 1, commutative: false},
+               {name: "Broadcast1To4Uint64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To8Float32x4", argLength: 1, commutative: false},
+               {name: "Broadcast1To8Float64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To8Int16x8", argLength: 1, commutative: false},
+               {name: "Broadcast1To8Int32x4", argLength: 1, commutative: false},
+               {name: "Broadcast1To8Int64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To8Uint16x8", argLength: 1, commutative: false},
+               {name: "Broadcast1To8Uint32x4", argLength: 1, commutative: false},
+               {name: "Broadcast1To8Uint64x2", argLength: 1, commutative: false},
+               {name: "Broadcast1To16Float32x4", argLength: 1, commutative: false},
+               {name: "Broadcast1To16Int8x16", argLength: 1, commutative: false},
+               {name: "Broadcast1To16Int16x8", argLength: 1, commutative: false},
+               {name: "Broadcast1To16Int32x4", argLength: 1, commutative: false},
+               {name: "Broadcast1To16Uint8x16", argLength: 1, commutative: false},
+               {name: "Broadcast1To16Uint16x8", argLength: 1, commutative: false},
+               {name: "Broadcast1To16Uint32x4", argLength: 1, commutative: false},
+               {name: "Broadcast1To32Int8x16", argLength: 1, commutative: false},
+               {name: "Broadcast1To32Int16x8", argLength: 1, commutative: false},
+               {name: "Broadcast1To32Uint8x16", argLength: 1, commutative: false},
+               {name: "Broadcast1To32Uint16x8", argLength: 1, commutative: false},
+               {name: "Broadcast1To64Int8x16", argLength: 1, commutative: false},
+               {name: "Broadcast1To64Uint8x16", argLength: 1, commutative: false},
                {name: "CeilFloat32x4", argLength: 1, commutative: false},
                {name: "CeilFloat32x8", argLength: 1, commutative: false},
                {name: "CeilFloat64x2", argLength: 1, commutative: false},
index 7b70dc26863f0b5efec425ff785197b298829c27..9e5fdb1fc12e0242169bcece24e9997fbfeb981a 100644 (file)
@@ -6309,36 +6309,36 @@ const (
        OpAverageUint16x8
        OpAverageUint16x16
        OpAverageUint16x32
-       OpBroadcast128Float32x4
-       OpBroadcast128Float64x2
-       OpBroadcast128Int8x16
-       OpBroadcast128Int16x8
-       OpBroadcast128Int32x4
-       OpBroadcast128Int64x2
-       OpBroadcast128Uint8x16
-       OpBroadcast128Uint16x8
-       OpBroadcast128Uint32x4
-       OpBroadcast128Uint64x2
-       OpBroadcast256Float32x4
-       OpBroadcast256Float64x2
-       OpBroadcast256Int8x16
-       OpBroadcast256Int16x8
-       OpBroadcast256Int32x4
-       OpBroadcast256Int64x2
-       OpBroadcast256Uint8x16
-       OpBroadcast256Uint16x8
-       OpBroadcast256Uint32x4
-       OpBroadcast256Uint64x2
-       OpBroadcast512Float32x4
-       OpBroadcast512Float64x2
-       OpBroadcast512Int8x16
-       OpBroadcast512Int16x8
-       OpBroadcast512Int32x4
-       OpBroadcast512Int64x2
-       OpBroadcast512Uint8x16
-       OpBroadcast512Uint16x8
-       OpBroadcast512Uint32x4
-       OpBroadcast512Uint64x2
+       OpBroadcast1To2Float64x2
+       OpBroadcast1To2Int64x2
+       OpBroadcast1To2Uint64x2
+       OpBroadcast1To4Float32x4
+       OpBroadcast1To4Float64x2
+       OpBroadcast1To4Int32x4
+       OpBroadcast1To4Int64x2
+       OpBroadcast1To4Uint32x4
+       OpBroadcast1To4Uint64x2
+       OpBroadcast1To8Float32x4
+       OpBroadcast1To8Float64x2
+       OpBroadcast1To8Int16x8
+       OpBroadcast1To8Int32x4
+       OpBroadcast1To8Int64x2
+       OpBroadcast1To8Uint16x8
+       OpBroadcast1To8Uint32x4
+       OpBroadcast1To8Uint64x2
+       OpBroadcast1To16Float32x4
+       OpBroadcast1To16Int8x16
+       OpBroadcast1To16Int16x8
+       OpBroadcast1To16Int32x4
+       OpBroadcast1To16Uint8x16
+       OpBroadcast1To16Uint16x8
+       OpBroadcast1To16Uint32x4
+       OpBroadcast1To32Int8x16
+       OpBroadcast1To32Int16x8
+       OpBroadcast1To32Uint8x16
+       OpBroadcast1To32Uint16x8
+       OpBroadcast1To64Int8x16
+       OpBroadcast1To64Uint8x16
        OpCeilFloat32x4
        OpCeilFloat32x8
        OpCeilFloat64x2
@@ -89875,152 +89875,152 @@ var opcodeTable = [...]opInfo{
                generic:     true,
        },
        {
-               name:    "Broadcast128Float32x4",
+               name:    "Broadcast1To2Float64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Float64x2",
+               name:    "Broadcast1To2Int64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Int8x16",
+               name:    "Broadcast1To2Uint64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Int16x8",
+               name:    "Broadcast1To4Float32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Int32x4",
+               name:    "Broadcast1To4Float64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Int64x2",
+               name:    "Broadcast1To4Int32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Uint8x16",
+               name:    "Broadcast1To4Int64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Uint16x8",
+               name:    "Broadcast1To4Uint32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Uint32x4",
+               name:    "Broadcast1To4Uint64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast128Uint64x2",
+               name:    "Broadcast1To8Float32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Float32x4",
+               name:    "Broadcast1To8Float64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Float64x2",
+               name:    "Broadcast1To8Int16x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Int8x16",
+               name:    "Broadcast1To8Int32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Int16x8",
+               name:    "Broadcast1To8Int64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Int32x4",
+               name:    "Broadcast1To8Uint16x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Int64x2",
+               name:    "Broadcast1To8Uint32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Uint8x16",
+               name:    "Broadcast1To8Uint64x2",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Uint16x8",
+               name:    "Broadcast1To16Float32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Uint32x4",
+               name:    "Broadcast1To16Int8x16",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast256Uint64x2",
+               name:    "Broadcast1To16Int16x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Float32x4",
+               name:    "Broadcast1To16Int32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Float64x2",
+               name:    "Broadcast1To16Uint8x16",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Int8x16",
+               name:    "Broadcast1To16Uint16x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Int16x8",
+               name:    "Broadcast1To16Uint32x4",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Int32x4",
+               name:    "Broadcast1To32Int8x16",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Int64x2",
+               name:    "Broadcast1To32Int16x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Uint8x16",
+               name:    "Broadcast1To32Uint8x16",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Uint16x8",
+               name:    "Broadcast1To32Uint16x8",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Uint32x4",
+               name:    "Broadcast1To64Int8x16",
                argLen:  1,
                generic: true,
        },
        {
-               name:    "Broadcast512Uint64x2",
+               name:    "Broadcast1To64Uint8x16",
                argLen:  1,
                generic: true,
        },
index e84bf19c8353197689eefc5ac5dab7189e8b7f1f..fe0005bb058bb58721707aea78f808b636ac5923 100644 (file)
@@ -2479,96 +2479,96 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpBitLen64(v)
        case OpBitLen8:
                return rewriteValueAMD64_OpBitLen8(v)
-       case OpBroadcast128Float32x4:
-               v.Op = OpAMD64VBROADCASTSS128
+       case OpBroadcast1To16Float32x4:
+               v.Op = OpAMD64VBROADCASTSS512
                return true
-       case OpBroadcast128Float64x2:
-               v.Op = OpAMD64VPBROADCASTQ128
+       case OpBroadcast1To16Int16x8:
+               v.Op = OpAMD64VPBROADCASTW256
                return true
-       case OpBroadcast128Int16x8:
-               v.Op = OpAMD64VPBROADCASTW128
+       case OpBroadcast1To16Int32x4:
+               v.Op = OpAMD64VPBROADCASTD512
                return true
-       case OpBroadcast128Int32x4:
-               v.Op = OpAMD64VPBROADCASTD128
+       case OpBroadcast1To16Int8x16:
+               v.Op = OpAMD64VPBROADCASTB128
                return true
-       case OpBroadcast128Int64x2:
-               v.Op = OpAMD64VPBROADCASTQ128
+       case OpBroadcast1To16Uint16x8:
+               v.Op = OpAMD64VPBROADCASTW256
                return true
-       case OpBroadcast128Int8x16:
+       case OpBroadcast1To16Uint32x4:
+               v.Op = OpAMD64VPBROADCASTD512
+               return true
+       case OpBroadcast1To16Uint8x16:
                v.Op = OpAMD64VPBROADCASTB128
                return true
-       case OpBroadcast128Uint16x8:
-               v.Op = OpAMD64VPBROADCASTW128
+       case OpBroadcast1To2Float64x2:
+               v.Op = OpAMD64VPBROADCASTQ128
                return true
-       case OpBroadcast128Uint32x4:
-               v.Op = OpAMD64VPBROADCASTD128
+       case OpBroadcast1To2Int64x2:
+               v.Op = OpAMD64VPBROADCASTQ128
                return true
-       case OpBroadcast128Uint64x2:
+       case OpBroadcast1To2Uint64x2:
                v.Op = OpAMD64VPBROADCASTQ128
                return true
-       case OpBroadcast128Uint8x16:
-               v.Op = OpAMD64VPBROADCASTB128
+       case OpBroadcast1To32Int16x8:
+               v.Op = OpAMD64VPBROADCASTW512
                return true
-       case OpBroadcast256Float32x4:
-               v.Op = OpAMD64VBROADCASTSS256
+       case OpBroadcast1To32Int8x16:
+               v.Op = OpAMD64VPBROADCASTB256
                return true
-       case OpBroadcast256Float64x2:
-               v.Op = OpAMD64VBROADCASTSD256
+       case OpBroadcast1To32Uint16x8:
+               v.Op = OpAMD64VPBROADCASTW512
                return true
-       case OpBroadcast256Int16x8:
-               v.Op = OpAMD64VPBROADCASTW256
+       case OpBroadcast1To32Uint8x16:
+               v.Op = OpAMD64VPBROADCASTB256
                return true
-       case OpBroadcast256Int32x4:
-               v.Op = OpAMD64VPBROADCASTD256
+       case OpBroadcast1To4Float32x4:
+               v.Op = OpAMD64VBROADCASTSS128
                return true
-       case OpBroadcast256Int64x2:
-               v.Op = OpAMD64VPBROADCASTQ256
+       case OpBroadcast1To4Float64x2:
+               v.Op = OpAMD64VBROADCASTSD256
                return true
-       case OpBroadcast256Int8x16:
-               v.Op = OpAMD64VPBROADCASTB256
+       case OpBroadcast1To4Int32x4:
+               v.Op = OpAMD64VPBROADCASTD128
                return true
-       case OpBroadcast256Uint16x8:
-               v.Op = OpAMD64VPBROADCASTW256
+       case OpBroadcast1To4Int64x2:
+               v.Op = OpAMD64VPBROADCASTQ256
                return true
-       case OpBroadcast256Uint32x4:
-               v.Op = OpAMD64VPBROADCASTD256
+       case OpBroadcast1To4Uint32x4:
+               v.Op = OpAMD64VPBROADCASTD128
                return true
-       case OpBroadcast256Uint64x2:
+       case OpBroadcast1To4Uint64x2:
                v.Op = OpAMD64VPBROADCASTQ256
                return true
-       case OpBroadcast256Uint8x16:
-               v.Op = OpAMD64VPBROADCASTB256
+       case OpBroadcast1To64Int8x16:
+               v.Op = OpAMD64VPBROADCASTB512
                return true
-       case OpBroadcast512Float32x4:
-               v.Op = OpAMD64VBROADCASTSS512
+       case OpBroadcast1To64Uint8x16:
+               v.Op = OpAMD64VPBROADCASTB512
+               return true
+       case OpBroadcast1To8Float32x4:
+               v.Op = OpAMD64VBROADCASTSS256
                return true
-       case OpBroadcast512Float64x2:
+       case OpBroadcast1To8Float64x2:
                v.Op = OpAMD64VBROADCASTSD512
                return true
-       case OpBroadcast512Int16x8:
-               v.Op = OpAMD64VPBROADCASTW512
+       case OpBroadcast1To8Int16x8:
+               v.Op = OpAMD64VPBROADCASTW128
                return true
-       case OpBroadcast512Int32x4:
-               v.Op = OpAMD64VPBROADCASTD512
+       case OpBroadcast1To8Int32x4:
+               v.Op = OpAMD64VPBROADCASTD256
                return true
-       case OpBroadcast512Int64x2:
+       case OpBroadcast1To8Int64x2:
                v.Op = OpAMD64VPBROADCASTQ512
                return true
-       case OpBroadcast512Int8x16:
-               v.Op = OpAMD64VPBROADCASTB512
-               return true
-       case OpBroadcast512Uint16x8:
-               v.Op = OpAMD64VPBROADCASTW512
+       case OpBroadcast1To8Uint16x8:
+               v.Op = OpAMD64VPBROADCASTW128
                return true
-       case OpBroadcast512Uint32x4:
-               v.Op = OpAMD64VPBROADCASTD512
+       case OpBroadcast1To8Uint32x4:
+               v.Op = OpAMD64VPBROADCASTD256
                return true
-       case OpBroadcast512Uint64x2:
+       case OpBroadcast1To8Uint64x2:
                v.Op = OpAMD64VPBROADCASTQ512
                return true
-       case OpBroadcast512Uint8x16:
-               v.Op = OpAMD64VPBROADCASTB512
-               return true
        case OpBswap16:
                return rewriteValueAMD64_OpBswap16(v)
        case OpBswap32:
index 4ad0c6032c0cf5223d3cd6547306fbeaf4914625..e50561845b76a5ddd46dc0555a9fe9c13ba1d7ae 100644 (file)
@@ -152,36 +152,36 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.Broadcast128", opLen1(ssa.OpBroadcast128Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Broadcast128", opLen1(ssa.OpBroadcast128Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Broadcast128", opLen1(ssa.OpBroadcast128Int8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Broadcast128", opLen1(ssa.OpBroadcast128Int16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Broadcast128", opLen1(ssa.OpBroadcast128Int32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Broadcast128", opLen1(ssa.OpBroadcast128Int64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Broadcast128", opLen1(ssa.OpBroadcast128Uint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Broadcast128", opLen1(ssa.OpBroadcast128Uint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Broadcast128", opLen1(ssa.OpBroadcast128Uint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Broadcast128", opLen1(ssa.OpBroadcast128Uint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Broadcast256", opLen1(ssa.OpBroadcast256Float32x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x2.Broadcast256", opLen1(ssa.OpBroadcast256Float64x2, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x16.Broadcast256", opLen1(ssa.OpBroadcast256Int8x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x8.Broadcast256", opLen1(ssa.OpBroadcast256Int16x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x4.Broadcast256", opLen1(ssa.OpBroadcast256Int32x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x2.Broadcast256", opLen1(ssa.OpBroadcast256Int64x2, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Broadcast256", opLen1(ssa.OpBroadcast256Uint8x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Broadcast256", opLen1(ssa.OpBroadcast256Uint16x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Broadcast256", opLen1(ssa.OpBroadcast256Uint32x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Broadcast256", opLen1(ssa.OpBroadcast256Uint64x2, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x4.Broadcast512", opLen1(ssa.OpBroadcast512Float32x4, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x2.Broadcast512", opLen1(ssa.OpBroadcast512Float64x2, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.Broadcast512", opLen1(ssa.OpBroadcast512Int8x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.Broadcast512", opLen1(ssa.OpBroadcast512Int16x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.Broadcast512", opLen1(ssa.OpBroadcast512Int32x4, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.Broadcast512", opLen1(ssa.OpBroadcast512Int64x2, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Broadcast512", opLen1(ssa.OpBroadcast512Uint8x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Broadcast512", opLen1(ssa.OpBroadcast512Uint16x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Broadcast512", opLen1(ssa.OpBroadcast512Uint32x4, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Broadcast512", opLen1(ssa.OpBroadcast512Uint64x2, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Int64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Broadcast1To2", opLen1(ssa.OpBroadcast1To2Uint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Float64x2, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Int64x2, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Broadcast1To4", opLen1(ssa.OpBroadcast1To4Uint64x2, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float32x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Float64x2, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int32x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Int64x2, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint32x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Broadcast1To8", opLen1(ssa.OpBroadcast1To8Uint64x2, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Float32x4, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int16x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Int32x4, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint16x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Broadcast1To16", opLen1(ssa.OpBroadcast1To16Uint32x4, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int8x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Int16x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint8x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Broadcast1To32", opLen1(ssa.OpBroadcast1To32Uint16x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Int8x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Broadcast1To64", opLen1(ssa.OpBroadcast1To64Uint8x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
index 38bc9374cc2608ddc18b89374550eb6eb8d20be2..3cba01ef95368030004aa48e573ce80a737d9685 100644 (file)
   documentation: !string |-
     // NAME performs an expansion on a vector x whose elements are packed to lower parts.
     // The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order.
-- go: Broadcast128
+- go: Broadcast1To2
   commutative: false
   documentation: !string |-
-    // NAME copies element zero of its (128-bit) input to all elements of
-    // the 128-bit output vector.
-- go: Broadcast256
+    // NAME copies the lowest element of its input to all 2 elements of
+    // the output vector.
+- go: Broadcast1To4
   commutative: false
   documentation: !string |-
-    // NAME copies element zero of its (128-bit) input to all elements of
-    // the 256-bit output vector.
-- go: Broadcast512
+    // NAME copies the lowest element of its input to all 4 elements of
+    // the output vector.
+- go: Broadcast1To8
   commutative: false
   documentation: !string |-
-    // NAME copies element zero of its (128-bit) input to all elements of
-    // the 512-bit output vector.
+    // NAME copies the lowest element of its input to all 8 elements of
+    // the output vector.
+- go: Broadcast1To16
+  commutative: false
+  documentation: !string |-
+    // NAME copies the lowest element of its input to all 16 elements of
+    // the output vector.
+- go: Broadcast1To32
+  commutative: false
+  documentation: !string |-
+    // NAME copies the lowest element of its input to all 32 elements of
+    // the output vector.
+- go: Broadcast1To64
+  commutative: false
+  documentation: !string |-
+    // NAME copies the lowest element of its input to all 64 elements of
+    // the output vector.
 - go: PermuteOrZeroGrouped
   commutative: false
   documentation: !string |- # Detailed documentation will rely on the specific ops.
index e1fd184ed7536d6496722534b7e8dc15d447aaaf..02daa2ea1e2951b25bc70f343b730e93202f5a51 100644 (file)
   out:
   - *any
 
-- go: Broadcast128
-  asm: VPBROADCAST[BWDQ]
+- go: Broadcast1To2
+  asm: VPBROADCASTQ
   in:
   - class: vreg
     bits: 128
-    elemBits: $e
+    elemBits: 64
     base: $b
   out:
   - class: vreg
     bits: 128
-    elemBits: $e
+    elemBits: 64
     base: $b
 
 # weirdly, this one case on AVX2 is memory-operand-only
-- go: Broadcast128
+- go: Broadcast1To2
   asm: VPBROADCASTQ
   in:
   - class: vreg
     base: int
     OverwriteBase: float
 
-- go: Broadcast256
+- go: Broadcast1To4
   asm: VPBROADCAST[BWDQ]
   in:
   - class: vreg
     bits: 128
-    elemBits: $e
     base: $b
   out:
   - class: vreg
-    bits: 256
-    elemBits: $e
+    lanes: 4
     base: $b
 
-- go: Broadcast512
+- go: Broadcast1To8
   asm: VPBROADCAST[BWDQ]
   in:
   - class: vreg
     bits: 128
-    elemBits: $e
     base: $b
   out:
   - class: vreg
-    bits: 512
-    elemBits: $e
+    lanes: 8
     base: $b
 
-- go: Broadcast128
-  asm: VBROADCASTS[SD]
+- go: Broadcast1To16
+  asm: VPBROADCAST[BWDQ]
   in:
   - class: vreg
     bits: 128
-    elemBits: $e
     base: $b
   out:
   - class: vreg
-    bits: 128
-    elemBits: $e
+    lanes: 16
     base: $b
 
-- go: Broadcast256
-  asm: VBROADCASTS[SD]
+- go: Broadcast1To32
+  asm: VPBROADCAST[BWDQ]
   in:
   - class: vreg
     bits: 128
-    elemBits: $e
     base: $b
   out:
   - class: vreg
-    bits: 256
-    elemBits: $e
+    lanes: 32
     base: $b
 
-- go: Broadcast512
-  asm: VBROADCASTS[SD]
+- go: Broadcast1To64
+  asm: VPBROADCASTB
   in:
   - class: vreg
     bits: 128
-    elemBits: $e
     base: $b
   out:
   - class: vreg
-    bits: 512
-    elemBits: $e
+    lanes: 64
     base: $b
 
+- go: Broadcast1To4
+  asm: VBROADCASTS[SD]
+  in:
+  - class: vreg
+    bits: 128
+    base: float
+  out:
+  - class: vreg
+    lanes: 4
+    base: float
+
+- go: Broadcast1To8
+  asm: VBROADCASTS[SD]
+  in:
+  - class: vreg
+    bits: 128
+    base: float
+  out:
+  - class: vreg
+    lanes: 8
+    base: float
+
+- go: Broadcast1To16
+  asm: VBROADCASTS[SD]
+  in:
+  - class: vreg
+    bits: 128
+    base: float
+  out:
+  - class: vreg
+    lanes: 16
+    base: float
+
 # VPSHUFB for 128-bit byte shuffles will be picked with higher priority than VPERMB, given its lower CPU feature requirement. (It's AVX)
 - go: PermuteOrZero
   asm: VPSHUFB
index 8db185e1e0495ae4e35f796d9bf0ada862d92318..45338b765df131bbc7e4c2da3790bd81faf9f39e 100644 (file)
@@ -873,7 +873,7 @@ var broadcastTemplate = templateOf("Broadcast functions", `
 // Emulated, CPU Feature: {{.CPUfeatureBC}}
 func Broadcast{{.VType}}(x {{.Etype}}) {{.VType}} {
        var z {{.As128BitVec }}
-       return z.SetElem(0, x).Broadcast{{.Vwidth}}()
+       return z.SetElem(0, x).Broadcast1To{{.Count}}()
 }
 `)
 
index eba340c79390785b8a5d3e9f38dedefb7af9976d..bb162c4ff9a5443f3355404ba0030818f32b3d5d 100644 (file)
@@ -805,191 +805,197 @@ func (x Uint16x16) Average(y Uint16x16) Uint16x16
 // Asm: VPAVGW, CPU Feature: AVX512
 func (x Uint16x32) Average(y Uint16x32) Uint16x32
 
-/* Broadcast128 */
+/* Broadcast1To2 */
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To2 copies the lowest element of its input to all 2 elements of
+// the output vector.
 //
-// Asm: VBROADCASTSS, CPU Feature: AVX2
-func (x Float32x4) Broadcast128() Float32x4
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Float64x2) Broadcast1To2() Float64x2
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To2 copies the lowest element of its input to all 2 elements of
+// the output vector.
 //
 // Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Float64x2) Broadcast128() Float64x2
+func (x Int64x2) Broadcast1To2() Int64x2
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To2 copies the lowest element of its input to all 2 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Int8x16) Broadcast128() Int8x16
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Uint64x2) Broadcast1To2() Uint64x2
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
-//
-// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Int16x8) Broadcast128() Int16x8
+/* Broadcast1To4 */
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Int32x4) Broadcast128() Int32x4
+// Asm: VBROADCASTSS, CPU Feature: AVX2
+func (x Float32x4) Broadcast1To4() Float32x4
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Int64x2) Broadcast128() Int64x2
+// Asm: VBROADCASTSD, CPU Feature: AVX2
+func (x Float64x2) Broadcast1To4() Float64x4
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Uint8x16) Broadcast128() Uint8x16
+// Asm: VPBROADCASTD, CPU Feature: AVX2
+func (x Int32x4) Broadcast1To4() Int32x4
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Uint16x8) Broadcast128() Uint16x8
+// Asm: VPBROADCASTQ, CPU Feature: AVX2
+func (x Int64x2) Broadcast1To4() Int64x4
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
 //
 // Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Uint32x4) Broadcast128() Uint32x4
+func (x Uint32x4) Broadcast1To4() Uint32x4
 
-// Broadcast128 copies element zero of its (128-bit) input to all elements of
-// the 128-bit output vector.
+// Broadcast1To4 copies the lowest element of its input to all 4 elements of
+// the output vector.
 //
 // Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Uint64x2) Broadcast128() Uint64x2
+func (x Uint64x2) Broadcast1To4() Uint64x4
 
-/* Broadcast256 */
+/* Broadcast1To8 */
 
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
 //
 // Asm: VBROADCASTSS, CPU Feature: AVX2
-func (x Float32x4) Broadcast256() Float32x8
-
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// Asm: VBROADCASTSD, CPU Feature: AVX2
-func (x Float64x2) Broadcast256() Float64x4
+func (x Float32x4) Broadcast1To8() Float32x8
 
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Int8x16) Broadcast256() Int8x32
+// Asm: VBROADCASTSD, CPU Feature: AVX512
+func (x Float64x2) Broadcast1To8() Float64x8
 
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
 //
 // Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Int16x8) Broadcast256() Int16x16
+func (x Int16x8) Broadcast1To8() Int16x8
 
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
 //
 // Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Int32x4) Broadcast256() Int32x8
+func (x Int32x4) Broadcast1To8() Int32x8
 
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Int64x2) Broadcast256() Int64x4
-
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
-//
-// Asm: VPBROADCASTB, CPU Feature: AVX2
-func (x Uint8x16) Broadcast256() Uint8x32
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Int64x2) Broadcast1To8() Int64x8
 
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
 //
 // Asm: VPBROADCASTW, CPU Feature: AVX2
-func (x Uint16x8) Broadcast256() Uint16x16
+func (x Uint16x8) Broadcast1To8() Uint16x8
 
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
 //
 // Asm: VPBROADCASTD, CPU Feature: AVX2
-func (x Uint32x4) Broadcast256() Uint32x8
+func (x Uint32x4) Broadcast1To8() Uint32x8
 
-// Broadcast256 copies element zero of its (128-bit) input to all elements of
-// the 256-bit output vector.
+// Broadcast1To8 copies the lowest element of its input to all 8 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTQ, CPU Feature: AVX2
-func (x Uint64x2) Broadcast256() Uint64x4
+// Asm: VPBROADCASTQ, CPU Feature: AVX512
+func (x Uint64x2) Broadcast1To8() Uint64x8
 
-/* Broadcast512 */
+/* Broadcast1To16 */
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
 //
 // Asm: VBROADCASTSS, CPU Feature: AVX512
-func (x Float32x4) Broadcast512() Float32x16
+func (x Float32x4) Broadcast1To16() Float32x16
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
 //
-// Asm: VBROADCASTSD, CPU Feature: AVX512
-func (x Float64x2) Broadcast512() Float64x8
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Int8x16) Broadcast1To16() Int8x16
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Int8x16) Broadcast512() Int8x64
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Int16x8) Broadcast1To16() Int16x16
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Int16x8) Broadcast512() Int16x32
+// Asm: VPBROADCASTD, CPU Feature: AVX512
+func (x Int32x4) Broadcast1To16() Int32x16
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Uint8x16) Broadcast1To16() Uint8x16
+
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
+//
+// Asm: VPBROADCASTW, CPU Feature: AVX2
+func (x Uint16x8) Broadcast1To16() Uint16x16
+
+// Broadcast1To16 copies the lowest element of its input to all 16 elements of
+// the output vector.
 //
 // Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Int32x4) Broadcast512() Int32x16
+func (x Uint32x4) Broadcast1To16() Uint32x16
+
+/* Broadcast1To32 */
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Int64x2) Broadcast512() Int64x8
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Int8x16) Broadcast1To32() Int8x32
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTB, CPU Feature: AVX512
-func (x Uint8x16) Broadcast512() Uint8x64
+// Asm: VPBROADCASTW, CPU Feature: AVX512
+func (x Int16x8) Broadcast1To32() Int16x32
+
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
+//
+// Asm: VPBROADCASTB, CPU Feature: AVX2
+func (x Uint8x16) Broadcast1To32() Uint8x32
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To32 copies the lowest element of its input to all 32 elements of
+// the output vector.
 //
 // Asm: VPBROADCASTW, CPU Feature: AVX512
-func (x Uint16x8) Broadcast512() Uint16x32
+func (x Uint16x8) Broadcast1To32() Uint16x32
+
+/* Broadcast1To64 */
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To64 copies the lowest element of its input to all 64 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTD, CPU Feature: AVX512
-func (x Uint32x4) Broadcast512() Uint32x16
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Int8x16) Broadcast1To64() Int8x64
 
-// Broadcast512 copies element zero of its (128-bit) input to all elements of
-// the 512-bit output vector.
+// Broadcast1To64 copies the lowest element of its input to all 64 elements of
+// the output vector.
 //
-// Asm: VPBROADCASTQ, CPU Feature: AVX512
-func (x Uint64x2) Broadcast512() Uint64x8
+// Asm: VPBROADCASTB, CPU Feature: AVX512
+func (x Uint8x16) Broadcast1To64() Uint8x64
 
 /* Ceil */
 
index 647001accedd523740fd6dea68c3e065c427c462..c250dc243651305c2feae13341ee69cb7cf7ff11 100644 (file)
@@ -10,7 +10,7 @@ package archsimd
 // Emulated, CPU Feature: AVX2
 func BroadcastInt8x16(x int8) Int8x16 {
        var z Int8x16
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To16()
 }
 
 // BroadcastInt16x8 returns a vector with the input
@@ -19,7 +19,7 @@ func BroadcastInt8x16(x int8) Int8x16 {
 // Emulated, CPU Feature: AVX2
 func BroadcastInt16x8(x int16) Int16x8 {
        var z Int16x8
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To8()
 }
 
 // BroadcastInt32x4 returns a vector with the input
@@ -28,7 +28,7 @@ func BroadcastInt16x8(x int16) Int16x8 {
 // Emulated, CPU Feature: AVX2
 func BroadcastInt32x4(x int32) Int32x4 {
        var z Int32x4
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To4()
 }
 
 // BroadcastInt64x2 returns a vector with the input
@@ -37,7 +37,7 @@ func BroadcastInt32x4(x int32) Int32x4 {
 // Emulated, CPU Feature: AVX2
 func BroadcastInt64x2(x int64) Int64x2 {
        var z Int64x2
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To2()
 }
 
 // BroadcastUint8x16 returns a vector with the input
@@ -46,7 +46,7 @@ func BroadcastInt64x2(x int64) Int64x2 {
 // Emulated, CPU Feature: AVX2
 func BroadcastUint8x16(x uint8) Uint8x16 {
        var z Uint8x16
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To16()
 }
 
 // BroadcastUint16x8 returns a vector with the input
@@ -55,7 +55,7 @@ func BroadcastUint8x16(x uint8) Uint8x16 {
 // Emulated, CPU Feature: AVX2
 func BroadcastUint16x8(x uint16) Uint16x8 {
        var z Uint16x8
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To8()
 }
 
 // BroadcastUint32x4 returns a vector with the input
@@ -64,7 +64,7 @@ func BroadcastUint16x8(x uint16) Uint16x8 {
 // Emulated, CPU Feature: AVX2
 func BroadcastUint32x4(x uint32) Uint32x4 {
        var z Uint32x4
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To4()
 }
 
 // BroadcastUint64x2 returns a vector with the input
@@ -73,7 +73,7 @@ func BroadcastUint32x4(x uint32) Uint32x4 {
 // Emulated, CPU Feature: AVX2
 func BroadcastUint64x2(x uint64) Uint64x2 {
        var z Uint64x2
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To2()
 }
 
 // BroadcastFloat32x4 returns a vector with the input
@@ -82,7 +82,7 @@ func BroadcastUint64x2(x uint64) Uint64x2 {
 // Emulated, CPU Feature: AVX2
 func BroadcastFloat32x4(x float32) Float32x4 {
        var z Float32x4
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To4()
 }
 
 // BroadcastFloat64x2 returns a vector with the input
@@ -91,7 +91,7 @@ func BroadcastFloat32x4(x float32) Float32x4 {
 // Emulated, CPU Feature: AVX2
 func BroadcastFloat64x2(x float64) Float64x2 {
        var z Float64x2
-       return z.SetElem(0, x).Broadcast128()
+       return z.SetElem(0, x).Broadcast1To2()
 }
 
 // BroadcastInt8x32 returns a vector with the input
@@ -100,7 +100,7 @@ func BroadcastFloat64x2(x float64) Float64x2 {
 // Emulated, CPU Feature: AVX2
 func BroadcastInt8x32(x int8) Int8x32 {
        var z Int8x16
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To32()
 }
 
 // BroadcastInt16x16 returns a vector with the input
@@ -109,7 +109,7 @@ func BroadcastInt8x32(x int8) Int8x32 {
 // Emulated, CPU Feature: AVX2
 func BroadcastInt16x16(x int16) Int16x16 {
        var z Int16x8
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To16()
 }
 
 // BroadcastInt32x8 returns a vector with the input
@@ -118,7 +118,7 @@ func BroadcastInt16x16(x int16) Int16x16 {
 // Emulated, CPU Feature: AVX2
 func BroadcastInt32x8(x int32) Int32x8 {
        var z Int32x4
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To8()
 }
 
 // BroadcastInt64x4 returns a vector with the input
@@ -127,7 +127,7 @@ func BroadcastInt32x8(x int32) Int32x8 {
 // Emulated, CPU Feature: AVX2
 func BroadcastInt64x4(x int64) Int64x4 {
        var z Int64x2
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To4()
 }
 
 // BroadcastUint8x32 returns a vector with the input
@@ -136,7 +136,7 @@ func BroadcastInt64x4(x int64) Int64x4 {
 // Emulated, CPU Feature: AVX2
 func BroadcastUint8x32(x uint8) Uint8x32 {
        var z Uint8x16
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To32()
 }
 
 // BroadcastUint16x16 returns a vector with the input
@@ -145,7 +145,7 @@ func BroadcastUint8x32(x uint8) Uint8x32 {
 // Emulated, CPU Feature: AVX2
 func BroadcastUint16x16(x uint16) Uint16x16 {
        var z Uint16x8
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To16()
 }
 
 // BroadcastUint32x8 returns a vector with the input
@@ -154,7 +154,7 @@ func BroadcastUint16x16(x uint16) Uint16x16 {
 // Emulated, CPU Feature: AVX2
 func BroadcastUint32x8(x uint32) Uint32x8 {
        var z Uint32x4
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To8()
 }
 
 // BroadcastUint64x4 returns a vector with the input
@@ -163,7 +163,7 @@ func BroadcastUint32x8(x uint32) Uint32x8 {
 // Emulated, CPU Feature: AVX2
 func BroadcastUint64x4(x uint64) Uint64x4 {
        var z Uint64x2
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To4()
 }
 
 // BroadcastFloat32x8 returns a vector with the input
@@ -172,7 +172,7 @@ func BroadcastUint64x4(x uint64) Uint64x4 {
 // Emulated, CPU Feature: AVX2
 func BroadcastFloat32x8(x float32) Float32x8 {
        var z Float32x4
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To8()
 }
 
 // BroadcastFloat64x4 returns a vector with the input
@@ -181,7 +181,7 @@ func BroadcastFloat32x8(x float32) Float32x8 {
 // Emulated, CPU Feature: AVX2
 func BroadcastFloat64x4(x float64) Float64x4 {
        var z Float64x2
-       return z.SetElem(0, x).Broadcast256()
+       return z.SetElem(0, x).Broadcast1To4()
 }
 
 // BroadcastInt8x64 returns a vector with the input
@@ -190,7 +190,7 @@ func BroadcastFloat64x4(x float64) Float64x4 {
 // Emulated, CPU Feature: AVX512BW
 func BroadcastInt8x64(x int8) Int8x64 {
        var z Int8x16
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To64()
 }
 
 // BroadcastInt16x32 returns a vector with the input
@@ -199,7 +199,7 @@ func BroadcastInt8x64(x int8) Int8x64 {
 // Emulated, CPU Feature: AVX512BW
 func BroadcastInt16x32(x int16) Int16x32 {
        var z Int16x8
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To32()
 }
 
 // BroadcastInt32x16 returns a vector with the input
@@ -208,7 +208,7 @@ func BroadcastInt16x32(x int16) Int16x32 {
 // Emulated, CPU Feature: AVX512F
 func BroadcastInt32x16(x int32) Int32x16 {
        var z Int32x4
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To16()
 }
 
 // BroadcastInt64x8 returns a vector with the input
@@ -217,7 +217,7 @@ func BroadcastInt32x16(x int32) Int32x16 {
 // Emulated, CPU Feature: AVX512F
 func BroadcastInt64x8(x int64) Int64x8 {
        var z Int64x2
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To8()
 }
 
 // BroadcastUint8x64 returns a vector with the input
@@ -226,7 +226,7 @@ func BroadcastInt64x8(x int64) Int64x8 {
 // Emulated, CPU Feature: AVX512BW
 func BroadcastUint8x64(x uint8) Uint8x64 {
        var z Uint8x16
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To64()
 }
 
 // BroadcastUint16x32 returns a vector with the input
@@ -235,7 +235,7 @@ func BroadcastUint8x64(x uint8) Uint8x64 {
 // Emulated, CPU Feature: AVX512BW
 func BroadcastUint16x32(x uint16) Uint16x32 {
        var z Uint16x8
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To32()
 }
 
 // BroadcastUint32x16 returns a vector with the input
@@ -244,7 +244,7 @@ func BroadcastUint16x32(x uint16) Uint16x32 {
 // Emulated, CPU Feature: AVX512F
 func BroadcastUint32x16(x uint32) Uint32x16 {
        var z Uint32x4
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To16()
 }
 
 // BroadcastUint64x8 returns a vector with the input
@@ -253,7 +253,7 @@ func BroadcastUint32x16(x uint32) Uint32x16 {
 // Emulated, CPU Feature: AVX512F
 func BroadcastUint64x8(x uint64) Uint64x8 {
        var z Uint64x2
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To8()
 }
 
 // BroadcastFloat32x16 returns a vector with the input
@@ -262,7 +262,7 @@ func BroadcastUint64x8(x uint64) Uint64x8 {
 // Emulated, CPU Feature: AVX512F
 func BroadcastFloat32x16(x float32) Float32x16 {
        var z Float32x4
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To16()
 }
 
 // BroadcastFloat64x8 returns a vector with the input
@@ -271,7 +271,7 @@ func BroadcastFloat32x16(x float32) Float32x16 {
 // Emulated, CPU Feature: AVX512F
 func BroadcastFloat64x8(x float64) Float64x8 {
        var z Float64x2
-       return z.SetElem(0, x).Broadcast512()
+       return z.SetElem(0, x).Broadcast1To8()
 }
 
 // ToMask converts from Int8x16 to Mask8x16, mask element is set to true when the corresponding vector element is non-zero.