]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile, simd: add definitions for VPTERNLOG[DQ]
authorDavid Chase <drchase@google.com>
Fri, 3 Oct 2025 18:44:59 +0000 (14:44 -0400)
committerDavid Chase <drchase@google.com>
Wed, 22 Oct 2025 18:38:57 +0000 (11:38 -0700)
This includes an non-public intrinsic for testing,
and a test.  Optimizations using this instruction
will follow in another CL.

Change-Id: I7f7a93212249a16a30bd1379c717f8a7f9915daf
Reviewed-on: https://go-review.googlesource.com/c/go/+/708995
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
13 files changed:
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/amd64/ssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/_gen/simdgen/gen_simdssa.go
src/simd/_gen/simdgen/ops/BitwiseLogic/categories.yaml
src/simd/_gen/simdgen/ops/BitwiseLogic/go.yaml
src/simd/ops_amd64.go
src/simd/pkginternal_test.go

index fe2ae019acd0a4ac6d28255f8ddeda1a07c78be8..86d44c12452cf5b26a936624ade18542d6227dad 100644 (file)
@@ -1939,6 +1939,22 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPSHRDQMasked512load:
                p = simdV2kvloadImm8(s, v)
 
+       case ssa.OpAMD64VPTERNLOGD128,
+               ssa.OpAMD64VPTERNLOGD256,
+               ssa.OpAMD64VPTERNLOGD512,
+               ssa.OpAMD64VPTERNLOGQ128,
+               ssa.OpAMD64VPTERNLOGQ256,
+               ssa.OpAMD64VPTERNLOGQ512:
+               p = simdV31ResultInArg0Imm8(s, v)
+
+       case ssa.OpAMD64VPTERNLOGD128load,
+               ssa.OpAMD64VPTERNLOGD256load,
+               ssa.OpAMD64VPTERNLOGD512load,
+               ssa.OpAMD64VPTERNLOGQ128load,
+               ssa.OpAMD64VPTERNLOGQ256load,
+               ssa.OpAMD64VPTERNLOGQ512load:
+               p = simdV31loadResultInArg0Imm8(s, v)
+
        default:
                // Unknown reg shape
                return false
index 25fa7b695a2f3d188a1a4e5a5fb17e578890680a..b3f8191609f10a668e83f8594d9bea48fbd37b9d 100644 (file)
@@ -2095,6 +2095,37 @@ func simdV31ResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
        return p
 }
 
+func simdV31ResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
+       p := s.Prog(v.Op.Asm())
+       p.From.Offset = int64(v.AuxUInt8())
+       p.From.Type = obj.TYPE_CONST
+
+       p.AddRestSourceReg(simdReg(v.Args[2]))
+       p.AddRestSourceReg(simdReg(v.Args[1]))
+       // p.AddRestSourceReg(x86.REG_K0)
+       p.To.Type = obj.TYPE_REG
+       p.To.Reg = simdReg(v)
+       return p
+}
+
+// v31loadResultInArg0Imm8
+// Example instruction:
+// for (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)]  x y ptr mem)
+func simdV31loadResultInArg0Imm8(s *ssagen.State, v *ssa.Value) *obj.Prog {
+       sc := v.AuxValAndOff()
+       p := s.Prog(v.Op.Asm())
+
+       p.From.Type = obj.TYPE_CONST
+       p.From.Offset = sc.Val64()
+
+       m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[2].Reg()}
+       ssagen.AddAux2(&m, v, sc.Off64())
+       p.AddRestSource(m)
+
+       p.AddRestSourceReg(simdReg(v.Args[1]))
+       return p
+}
+
 // Example instruction: VFMADD213PD Z2, Z1, K1, Z0
 func simdV3kvResultInArg0(s *ssagen.State, v *ssa.Value) *obj.Prog {
        p := s.Prog(v.Op.Asm())
index 9e34d4b8816ca0e15c95ebbb7c5e8b7355de6be6..2cda679f2dd060330507ade398c7c2aafc2b264a 100644 (file)
 (moveMaskedUint16x32 x mask) => (VMOVDQU16Masked512 x (VPMOVVec16x32ToM <types.TypeMask> mask))
 (moveMaskedUint32x16 x mask) => (VMOVDQU32Masked512 x (VPMOVVec32x16ToM <types.TypeMask> mask))
 (moveMaskedUint64x8 x mask) => (VMOVDQU64Masked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
+(ternInt32x4 ...) => (VPTERNLOGD128 ...)
+(ternInt32x8 ...) => (VPTERNLOGD256 ...)
+(ternInt32x16 ...) => (VPTERNLOGD512 ...)
+(ternInt64x2 ...) => (VPTERNLOGQ128 ...)
+(ternInt64x4 ...) => (VPTERNLOGQ256 ...)
+(ternInt64x8 ...) => (VPTERNLOGQ512 ...)
+(ternUint32x4 ...) => (VPTERNLOGD128 ...)
+(ternUint32x8 ...) => (VPTERNLOGD256 ...)
+(ternUint32x16 ...) => (VPTERNLOGD512 ...)
+(ternUint64x2 ...) => (VPTERNLOGQ128 ...)
+(ternUint64x4 ...) => (VPTERNLOGQ256 ...)
+(ternUint64x8 ...) => (VPTERNLOGQ512 ...)
 (VMOVDQU8Masked512 (VPABSB512 x) mask) => (VPABSBMasked512 x mask)
 (VMOVDQU16Masked512 (VPABSW512 x) mask) => (VPABSWMasked512 x mask)
 (VMOVDQU32Masked512 (VPABSD512 x) mask) => (VPABSDMasked512 x mask)
 (VPSRAQMasked128const [c]  l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)]  ptr mask mem)
 (VPSRAQMasked256const [c]  l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)]  ptr mask mem)
 (VPSRAQMasked512const [c]  l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)]  ptr mask mem)
+(VPTERNLOGD128 [c]  x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)]  x y ptr mem)
+(VPTERNLOGD256 [c]  x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD256load {sym} [makeValAndOff(int32(int8(c)),off)]  x y ptr mem)
+(VPTERNLOGD512 [c]  x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGD512load {sym} [makeValAndOff(int32(int8(c)),off)]  x y ptr mem)
+(VPTERNLOGQ128 [c]  x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ128load {sym} [makeValAndOff(int32(int8(c)),off)]  x y ptr mem)
+(VPTERNLOGQ256 [c]  x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ256load {sym} [makeValAndOff(int32(int8(c)),off)]  x y ptr mem)
+(VPTERNLOGQ512 [c]  x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPTERNLOGQ512load {sym} [makeValAndOff(int32(int8(c)),off)]  x y ptr mem)
index 2cdf80c1ba1995ea24c864371abb9ac881cfe774..add281c6b92a49d0ebafd097ae67ed5773155db6 100644 (file)
@@ -1322,6 +1322,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPSRAQMasked128const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPSRAQMasked256const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPSRAQMasked512const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VPTERNLOGD128", argLength: 3, reg: w31, asm: "VPTERNLOGD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPTERNLOGD256", argLength: 3, reg: w31, asm: "VPTERNLOGD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
+               {name: "VPTERNLOGD512", argLength: 3, reg: w31, asm: "VPTERNLOGD", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
+               {name: "VPTERNLOGQ128", argLength: 3, reg: w31, asm: "VPTERNLOGQ", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
+               {name: "VPTERNLOGQ256", argLength: 3, reg: w31, asm: "VPTERNLOGQ", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: true},
+               {name: "VPTERNLOGQ512", argLength: 3, reg: w31, asm: "VPTERNLOGQ", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: true},
                {name: "VPABSD512load", argLength: 2, reg: w11load, asm: "VPABSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: false},
                {name: "VPABSQ128load", argLength: 2, reg: w11load, asm: "VPABSQ", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: false},
                {name: "VPABSQ256load", argLength: 2, reg: w11load, asm: "VPABSQ", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: false},
@@ -1870,5 +1876,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPSRAQMasked128constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
                {name: "VPSRAQMasked256constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
                {name: "VPSRAQMasked512constload", argLength: 3, reg: wkwload, asm: "VPSRAQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: false},
+               {name: "VPTERNLOGD128load", argLength: 4, reg: w31load, asm: "VPTERNLOGD", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
+               {name: "VPTERNLOGD256load", argLength: 4, reg: w31load, asm: "VPTERNLOGD", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
+               {name: "VPTERNLOGD512load", argLength: 4, reg: w31load, asm: "VPTERNLOGD", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
+               {name: "VPTERNLOGQ128load", argLength: 4, reg: w31load, asm: "VPTERNLOGQ", commutative: false, typ: "Vec128", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
+               {name: "VPTERNLOGQ256load", argLength: 4, reg: w31load, asm: "VPTERNLOGQ", commutative: false, typ: "Vec256", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
+               {name: "VPTERNLOGQ512load", argLength: 4, reg: w31load, asm: "VPTERNLOGQ", commutative: false, typ: "Vec512", aux: "SymValAndOff", symEffect: "Read", resultInArg0: true},
        }
 }
index f5eb9075d710b0ff13e6dede5f4ba357f1cb4ba7..546f6c0bc5831ab10f17d335753cae54ace729a1 100644 (file)
@@ -1288,5 +1288,17 @@ func simdGenericOps() []opData {
                {name: "concatSelectedConstantInt64x2", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "concatSelectedConstantUint32x4", argLength: 2, commutative: false, aux: "UInt8"},
                {name: "concatSelectedConstantUint64x2", argLength: 2, commutative: false, aux: "UInt8"},
+               {name: "ternInt32x4", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternInt32x8", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternInt32x16", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternInt64x2", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternInt64x4", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternInt64x8", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternUint32x4", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternUint32x8", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternUint32x16", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternUint64x2", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternUint64x4", argLength: 3, commutative: false, aux: "UInt8"},
+               {name: "ternUint64x8", argLength: 3, commutative: false, aux: "UInt8"},
        }
 }
index 6dd7082e100651e3096c7dc2cf5c04f8ea0a857f..91873744602e9302c956e536abeea1dfcd8ea3d4 100644 (file)
@@ -2562,6 +2562,12 @@ const (
        OpAMD64VPSRAQMasked128const
        OpAMD64VPSRAQMasked256const
        OpAMD64VPSRAQMasked512const
+       OpAMD64VPTERNLOGD128
+       OpAMD64VPTERNLOGD256
+       OpAMD64VPTERNLOGD512
+       OpAMD64VPTERNLOGQ128
+       OpAMD64VPTERNLOGQ256
+       OpAMD64VPTERNLOGQ512
        OpAMD64VPABSD512load
        OpAMD64VPABSQ128load
        OpAMD64VPABSQ256load
@@ -3110,6 +3116,12 @@ const (
        OpAMD64VPSRAQMasked128constload
        OpAMD64VPSRAQMasked256constload
        OpAMD64VPSRAQMasked512constload
+       OpAMD64VPTERNLOGD128load
+       OpAMD64VPTERNLOGD256load
+       OpAMD64VPTERNLOGD512load
+       OpAMD64VPTERNLOGQ128load
+       OpAMD64VPTERNLOGQ256load
+       OpAMD64VPTERNLOGQ512load
 
        OpARMADD
        OpARMADDconst
@@ -6669,6 +6681,18 @@ const (
        OpconcatSelectedConstantInt64x2
        OpconcatSelectedConstantUint32x4
        OpconcatSelectedConstantUint64x2
+       OpternInt32x4
+       OpternInt32x8
+       OpternInt32x16
+       OpternInt64x2
+       OpternInt64x4
+       OpternInt64x8
+       OpternUint32x4
+       OpternUint32x8
+       OpternUint32x16
+       OpternUint64x2
+       OpternUint64x4
+       OpternUint64x8
 )
 
 var opcodeTable = [...]opInfo{
@@ -39366,6 +39390,108 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "VPTERNLOGD128",
+               auxType:      auxUInt8,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPTERNLOGD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGD256",
+               auxType:      auxUInt8,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPTERNLOGD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGD512",
+               auxType:      auxUInt8,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPTERNLOGD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGQ128",
+               auxType:      auxUInt8,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPTERNLOGQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGQ256",
+               auxType:      auxUInt8,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPTERNLOGQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGQ512",
+               auxType:      auxUInt8,
+               argLen:       3,
+               resultInArg0: true,
+               asm:          x86.AVPTERNLOGQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:      "VPABSD512load",
                auxType:   auxSymOff,
@@ -48504,6 +48630,114 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:         "VPTERNLOGD128load",
+               auxType:      auxSymValAndOff,
+               argLen:       4,
+               resultInArg0: true,
+               symEffect:    SymRead,
+               asm:          x86.AVPTERNLOGD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGD256load",
+               auxType:      auxSymValAndOff,
+               argLen:       4,
+               resultInArg0: true,
+               symEffect:    SymRead,
+               asm:          x86.AVPTERNLOGD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGD512load",
+               auxType:      auxSymValAndOff,
+               argLen:       4,
+               resultInArg0: true,
+               symEffect:    SymRead,
+               asm:          x86.AVPTERNLOGD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGQ128load",
+               auxType:      auxSymValAndOff,
+               argLen:       4,
+               resultInArg0: true,
+               symEffect:    SymRead,
+               asm:          x86.AVPTERNLOGQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGQ256load",
+               auxType:      auxSymValAndOff,
+               argLen:       4,
+               resultInArg0: true,
+               symEffect:    SymRead,
+               asm:          x86.AVPTERNLOGQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:         "VPTERNLOGQ512load",
+               auxType:      auxSymValAndOff,
+               argLen:       4,
+               resultInArg0: true,
+               symEffect:    SymRead,
+               asm:          x86.AVPTERNLOGQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
 
        {
                name:        "ADD",
@@ -82840,6 +83074,78 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "ternInt32x4",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternInt32x8",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternInt32x16",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternInt64x2",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternInt64x4",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternInt64x8",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternUint32x4",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternUint32x8",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternUint32x16",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternUint64x2",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternUint64x4",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
+       {
+               name:    "ternUint64x8",
+               auxType: auxUInt8,
+               argLen:  3,
+               generic: true,
+       },
 }
 
 func (o Op) Asm() obj.As          { return opcodeTable[o].asm }
index 42814029144a6756702a88414bb050bbec26be66..89b6d1600b33d22d5a7781ee14d77a8316238d01 100644 (file)
@@ -1609,6 +1609,18 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64VPSUBQMasked256(v)
        case OpAMD64VPSUBQMasked512:
                return rewriteValueAMD64_OpAMD64VPSUBQMasked512(v)
+       case OpAMD64VPTERNLOGD128:
+               return rewriteValueAMD64_OpAMD64VPTERNLOGD128(v)
+       case OpAMD64VPTERNLOGD256:
+               return rewriteValueAMD64_OpAMD64VPTERNLOGD256(v)
+       case OpAMD64VPTERNLOGD512:
+               return rewriteValueAMD64_OpAMD64VPTERNLOGD512(v)
+       case OpAMD64VPTERNLOGQ128:
+               return rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v)
+       case OpAMD64VPTERNLOGQ256:
+               return rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v)
+       case OpAMD64VPTERNLOGQ512:
+               return rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v)
        case OpAMD64VPUNPCKHDQ512:
                return rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v)
        case OpAMD64VPUNPCKHQDQ512:
@@ -6061,6 +6073,42 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpmoveMaskedUint64x8(v)
        case OpmoveMaskedUint8x64:
                return rewriteValueAMD64_OpmoveMaskedUint8x64(v)
+       case OpternInt32x16:
+               v.Op = OpAMD64VPTERNLOGD512
+               return true
+       case OpternInt32x4:
+               v.Op = OpAMD64VPTERNLOGD128
+               return true
+       case OpternInt32x8:
+               v.Op = OpAMD64VPTERNLOGD256
+               return true
+       case OpternInt64x2:
+               v.Op = OpAMD64VPTERNLOGQ128
+               return true
+       case OpternInt64x4:
+               v.Op = OpAMD64VPTERNLOGQ256
+               return true
+       case OpternInt64x8:
+               v.Op = OpAMD64VPTERNLOGQ512
+               return true
+       case OpternUint32x16:
+               v.Op = OpAMD64VPTERNLOGD512
+               return true
+       case OpternUint32x4:
+               v.Op = OpAMD64VPTERNLOGD128
+               return true
+       case OpternUint32x8:
+               v.Op = OpAMD64VPTERNLOGD256
+               return true
+       case OpternUint64x2:
+               v.Op = OpAMD64VPTERNLOGQ128
+               return true
+       case OpternUint64x4:
+               v.Op = OpAMD64VPTERNLOGQ256
+               return true
+       case OpternUint64x8:
+               v.Op = OpAMD64VPTERNLOGQ512
+               return true
        }
        return false
 }
@@ -45655,6 +45703,186 @@ func rewriteValueAMD64_OpAMD64VPSUBQMasked512(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpAMD64VPTERNLOGD128(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPTERNLOGD128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (VPTERNLOGD128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+       for {
+               c := auxIntToUint8(v.AuxInt)
+               x := v_0
+               y := v_1
+               l := v_2
+               if l.Op != OpAMD64VMOVDQUload128 {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPTERNLOGD128load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, y, ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPTERNLOGD256(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPTERNLOGD256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (VPTERNLOGD256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+       for {
+               c := auxIntToUint8(v.AuxInt)
+               x := v_0
+               y := v_1
+               l := v_2
+               if l.Op != OpAMD64VMOVDQUload256 {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPTERNLOGD256load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, y, ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPTERNLOGD512(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPTERNLOGD512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (VPTERNLOGD512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+       for {
+               c := auxIntToUint8(v.AuxInt)
+               x := v_0
+               y := v_1
+               l := v_2
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPTERNLOGD512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, y, ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPTERNLOGQ128(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPTERNLOGQ128 [c] x y l:(VMOVDQUload128 {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (VPTERNLOGQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+       for {
+               c := auxIntToUint8(v.AuxInt)
+               x := v_0
+               y := v_1
+               l := v_2
+               if l.Op != OpAMD64VMOVDQUload128 {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPTERNLOGQ128load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, y, ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPTERNLOGQ256(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPTERNLOGQ256 [c] x y l:(VMOVDQUload256 {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (VPTERNLOGQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+       for {
+               c := auxIntToUint8(v.AuxInt)
+               x := v_0
+               y := v_1
+               l := v_2
+               if l.Op != OpAMD64VMOVDQUload256 {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPTERNLOGQ256load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, y, ptr, mem)
+               return true
+       }
+       return false
+}
+func rewriteValueAMD64_OpAMD64VPTERNLOGQ512(v *Value) bool {
+       v_2 := v.Args[2]
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (VPTERNLOGQ512 [c] x y l:(VMOVDQUload512 {sym} [off] ptr mem))
+       // cond: canMergeLoad(v, l) && clobber(l)
+       // result: (VPTERNLOGQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x y ptr mem)
+       for {
+               c := auxIntToUint8(v.AuxInt)
+               x := v_0
+               y := v_1
+               l := v_2
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
+               }
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPTERNLOGQ512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, y, ptr, mem)
+               return true
+       }
+       return false
+}
 func rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index d4fb524b2471e8ac4ead85bd0affb78188030167..5b6b25fb70d9a5c870150890b8cb6fcac000114d 100644 (file)
@@ -1296,6 +1296,18 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x32.moveMasked", opLen2(ssa.OpmoveMaskedUint16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x16.moveMasked", opLen2(ssa.OpmoveMaskedUint32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x8.moveMasked", opLen2(ssa.OpmoveMaskedUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.tern", opLen3Imm8(ssa.OpternInt32x4, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int32x8.tern", opLen3Imm8(ssa.OpternInt32x8, types.TypeVec256, 0), sys.AMD64)
+       addF(simdPackage, "Int32x16.tern", opLen3Imm8(ssa.OpternInt32x16, types.TypeVec512, 0), sys.AMD64)
+       addF(simdPackage, "Int64x2.tern", opLen3Imm8(ssa.OpternInt64x2, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int64x4.tern", opLen3Imm8(ssa.OpternInt64x4, types.TypeVec256, 0), sys.AMD64)
+       addF(simdPackage, "Int64x8.tern", opLen3Imm8(ssa.OpternInt64x8, types.TypeVec512, 0), sys.AMD64)
+       addF(simdPackage, "Uint32x4.tern", opLen3Imm8(ssa.OpternUint32x4, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint32x8.tern", opLen3Imm8(ssa.OpternUint32x8, types.TypeVec256, 0), sys.AMD64)
+       addF(simdPackage, "Uint32x16.tern", opLen3Imm8(ssa.OpternUint32x16, types.TypeVec512, 0), sys.AMD64)
+       addF(simdPackage, "Uint64x2.tern", opLen3Imm8(ssa.OpternUint64x2, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint64x4.tern", opLen3Imm8(ssa.OpternUint64x4, types.TypeVec256, 0), sys.AMD64)
+       addF(simdPackage, "Uint64x8.tern", opLen3Imm8(ssa.OpternUint64x8, types.TypeVec512, 0), sys.AMD64)
        addF(simdPackage, "Float32x4.AsFloat64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
        addF(simdPackage, "Float32x4.AsInt8x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
        addF(simdPackage, "Float32x4.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
index b48f5ce831e621a31c974b960b5dbfd65ed0421d..c1ce58454934a9fd356162452ae4e27fce6c9d77 100644 (file)
@@ -94,6 +94,8 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
                "v2kloadImm8",
                "v2kkloadImm8",
                "v2kvloadImm8",
+               "v31ResultInArg0Imm8",
+               "v31loadResultInArg0Imm8",
        }
        regInfoSet := map[string][]string{}
        for _, key := range regInfoKeys {
index 3142d1910d364d6eca1a1396977c54b6192ba75d..197e994b54c6732a82dc19b6bcd3a71d680832d9 100644 (file)
   commutative: true
   documentation: !string |-
     // NAME performs a bitwise XOR operation between two vectors.
+- go: tern
+  commutative: false
+  documentation: !string |-
+    // NAME performs a logical operation on three vectors based on the 8-bit truth table.
+    // Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
 
 # We also have PTEST and VPTERNLOG, those should be hidden from the users
 # and only appear in rewrite rules.
index ab344438fb27dc4bbeb3f27f1c75f37cfab3ac6d..ad46115462f14faddd161ae29b8ebc3c86dfac25 100644 (file)
   asm: "VPXORD" # Fill in the gap, Or is missing for Uint16x32 and Int16x32
   inVariant: []
   in: *twoI16x32
-  out: *oneI16x32
\ No newline at end of file
+  out: *oneI16x32
+
+- go: tern
+  asm: "VPTERNLOGD|VPTERNLOGQ"
+  in:
+  - &tern_op
+    go: $t
+  - *tern_op
+  - *tern_op
+  - class: immediate
+    immOffset: 0
+    name: table
+  inVariant: []
+  out:
+  - *tern_op
index 2331622361725ae971d5075537b09c7df104419d..49c387aea9c11628a534a91bf9b34b378f228490 100644 (file)
@@ -7872,6 +7872,104 @@ func (x Uint32x16) moveMasked(mask Mask32x16) Uint32x16
 // Asm: VMOVDQU64, CPU Feature: AVX512
 func (x Uint64x8) moveMasked(mask Mask64x8) Uint64x8
 
+/* tern */
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGD, CPU Feature: AVX512
+func (x Int32x4) tern(table uint8, y Int32x4, z Int32x4) Int32x4
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGD, CPU Feature: AVX512
+func (x Int32x8) tern(table uint8, y Int32x8, z Int32x8) Int32x8
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGD, CPU Feature: AVX512
+func (x Int32x16) tern(table uint8, y Int32x16, z Int32x16) Int32x16
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGQ, CPU Feature: AVX512
+func (x Int64x2) tern(table uint8, y Int64x2, z Int64x2) Int64x2
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGQ, CPU Feature: AVX512
+func (x Int64x4) tern(table uint8, y Int64x4, z Int64x4) Int64x4
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGQ, CPU Feature: AVX512
+func (x Int64x8) tern(table uint8, y Int64x8, z Int64x8) Int64x8
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGD, CPU Feature: AVX512
+func (x Uint32x4) tern(table uint8, y Uint32x4, z Uint32x4) Uint32x4
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGD, CPU Feature: AVX512
+func (x Uint32x8) tern(table uint8, y Uint32x8, z Uint32x8) Uint32x8
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGD, CPU Feature: AVX512
+func (x Uint32x16) tern(table uint8, y Uint32x16, z Uint32x16) Uint32x16
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGQ, CPU Feature: AVX512
+func (x Uint64x2) tern(table uint8, y Uint64x2, z Uint64x2) Uint64x2
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGQ, CPU Feature: AVX512
+func (x Uint64x4) tern(table uint8, y Uint64x4, z Uint64x4) Uint64x4
+
+// tern performs a logical operation on three vectors based on the 8-bit truth table.
+// Bitwise, the result is equal to 1 & (table >> (x<<2 + y<<1 + z))
+//
+// table results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPTERNLOGQ, CPU Feature: AVX512
+func (x Uint64x8) tern(table uint8, y Uint64x8, z Uint64x8) Uint64x8
+
 // Float64x2 converts from Float32x4 to Float64x2
 func (from Float32x4) AsFloat64x2() (to Float64x2)
 
index 632e24d9d9a9ee83024e9d70b7fc57cf5a9067c9..c5b46eb0d96cc1f41e84426b1b9c2132b09a699f 100644 (file)
@@ -47,6 +47,31 @@ func TestConcatSelectedConstantGrouped32(t *testing.T) {
        test_helpers.CheckSlices[uint32](t, a, []uint32{2, 0, 5, 7, 10, 8, 13, 15})
 }
 
+func TestTern(t *testing.T) {
+       if !HasAVX512() {
+               t.Skip("This test needs AVX512")
+       }
+       x := LoadInt32x8Slice([]int32{0, 0, 0, 0, 1, 1, 1, 1})
+       y := LoadInt32x8Slice([]int32{0, 0, 1, 1, 0, 0, 1, 1})
+       z := LoadInt32x8Slice([]int32{0, 1, 0, 1, 0, 1, 0, 1})
+
+       foo := func(w Int32x8, k uint8) {
+               a := make([]int32, 8)
+               w.StoreSlice(a)
+               t.Logf("For k=%0b, w=%v", k, a)
+               for i, b := range a {
+                       if (int32(k)>>i)&1 != b {
+                               t.Errorf("Element %d of stored slice (=%d) did not match corresponding bit in 0b%b",
+                                       i, b, k)
+                       }
+               }
+       }
+
+       foo(x.tern(0b1111_0000, y, z), 0b1111_0000)
+       foo(x.tern(0b1100_1100, y, z), 0b1100_1100)
+       foo(x.tern(0b1010_1010, y, z), 0b1010_1010)
+}
+
 func TestSelect2x4x32(t *testing.T) {
        for a := range uint8(8) {
                for b := range uint8(8) {