From 3f92aa1ecae1f935731cffefcfe3a400e284ab82 Mon Sep 17 00:00:00 2001
From: Junyang Shao <shaojunyang@google.com>
Date: Fri, 1 Aug 2025 19:13:13 +0000
Subject: [PATCH] [dev.simd] cmd/compile, simd: make bitwise logic ops
 available to all u?int vectors

This CL is generated by CL 692555.

Change-Id: I24e6de83e0408576f385a1c8e861b08c583f9098
Reviewed-on: https://go-review.googlesource.com/c/go/+/692356
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
---
 .../compile/internal/ssa/_gen/simdAMD64.rules |  16 +++
 .../internal/ssa/_gen/simdgenericOps.go       |  16 +++
 src/cmd/compile/internal/ssa/opGen.go         | 108 ++++++++++++++++++
 src/cmd/compile/internal/ssa/rewriteAMD64.go  |  48 ++++++++
 .../compile/internal/ssagen/simdintrinsics.go |  16 +++
 src/simd/binary_test.go                       |   8 +-
 src/simd/ops_amd64.go                         |  80 +++++++++++++
 7 files changed, 288 insertions(+), 4 deletions(-)

diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index 7b7cbb9dc7..1d54cfcdbd 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -96,8 +96,10 @@
 (AddSubFloat64x4 ...) => (VADDSUBPD256 ...)
 (AndInt8x16 ...) => (VPAND128 ...)
 (AndInt8x32 ...) => (VPAND256 ...)
+(AndInt8x64 ...) => (VPANDD512 ...)
 (AndInt16x8 ...) => (VPAND128 ...)
 (AndInt16x16 ...) => (VPAND256 ...)
+(AndInt16x32 ...) => (VPANDD512 ...)
 (AndInt32x4 ...) => (VPAND128 ...)
 (AndInt32x8 ...) => (VPAND256 ...)
 (AndInt32x16 ...) => (VPANDD512 ...)
@@ -106,8 +108,10 @@
 (AndInt64x8 ...) => (VPANDQ512 ...)
 (AndUint8x16 ...) => (VPAND128 ...)
 (AndUint8x32 ...) => (VPAND256 ...)
+(AndUint8x64 ...) => (VPANDD512 ...)
 (AndUint16x8 ...) => (VPAND128 ...)
 (AndUint16x16 ...) => (VPAND256 ...)
+(AndUint16x32 ...) => (VPANDD512 ...)
 (AndUint32x4 ...) => (VPAND128 ...)
 (AndUint32x8 ...) => (VPAND256 ...)
 (AndUint32x16 ...) => (VPANDD512 ...)
@@ -128,8 +132,10 @@
 (AndMaskedUint64x8 x y mask) => (VPANDQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
 (AndNotInt8x16 ...) => (VPANDN128 ...)
 (AndNotInt8x32 ...) => (VPANDN256 ...)
+(AndNotInt8x64 ...) => (VPANDND512 ...)
 (AndNotInt16x8 ...) => (VPANDN128 ...)
 (AndNotInt16x16 ...) => (VPANDN256 ...)
+(AndNotInt16x32 ...) => (VPANDND512 ...)
 (AndNotInt32x4 ...) => (VPANDN128 ...)
 (AndNotInt32x8 ...) => (VPANDN256 ...)
 (AndNotInt32x16 ...) => (VPANDND512 ...)
@@ -138,8 +144,10 @@
 (AndNotInt64x8 ...) => (VPANDNQ512 ...)
 (AndNotUint8x16 ...) => (VPANDN128 ...)
 (AndNotUint8x32 ...) => (VPANDN256 ...)
+(AndNotUint8x64 ...) => (VPANDND512 ...)
 (AndNotUint16x8 ...) => (VPANDN128 ...)
 (AndNotUint16x16 ...) => (VPANDN256 ...)
+(AndNotUint16x32 ...) => (VPANDND512 ...)
 (AndNotUint32x4 ...) => (VPANDN128 ...)
 (AndNotUint32x8 ...) => (VPANDN256 ...)
 (AndNotUint32x16 ...) => (VPANDND512 ...)
@@ -967,8 +975,10 @@
 (NotEqualMaskedUint64x8 x y mask) => (VPMOVMToVec64x8 (VPCMPUQMasked512 [4] x y (VPMOVVec64x8ToM <types.TypeMask> mask)))
 (OrInt8x16 ...) => (VPOR128 ...)
 (OrInt8x32 ...) => (VPOR256 ...)
+(OrInt8x64 ...) => (VPORD512 ...)
 (OrInt16x8 ...) => (VPOR128 ...)
 (OrInt16x16 ...) => (VPOR256 ...)
+(OrInt16x32 ...) => (VPORD512 ...)
 (OrInt32x4 ...) => (VPOR128 ...)
 (OrInt32x8 ...) => (VPOR256 ...)
 (OrInt32x16 ...) => (VPORD512 ...)
@@ -977,8 +987,10 @@
 (OrInt64x8 ...) => (VPORQ512 ...)
 (OrUint8x16 ...) => (VPOR128 ...)
 (OrUint8x32 ...) => (VPOR256 ...)
+(OrUint8x64 ...) => (VPORD512 ...)
 (OrUint16x8 ...) => (VPOR128 ...)
 (OrUint16x16 ...) => (VPOR256 ...)
+(OrUint16x32 ...) => (VPORD512 ...)
 (OrUint32x4 ...) => (VPOR128 ...)
 (OrUint32x8 ...) => (VPOR256 ...)
 (OrUint32x16 ...) => (VPORD512 ...)
@@ -1773,8 +1785,10 @@
 (UnsignedSignedQuadDotProdAccumulateMaskedInt32x16 x y z mask) => (VPDPBUSDMasked512 x y z (VPMOVVec32x16ToM <types.TypeMask> mask))
 (XorInt8x16 ...) => (VPXOR128 ...)
 (XorInt8x32 ...) => (VPXOR256 ...)
+(XorInt8x64 ...) => (VPXORD512 ...)
 (XorInt16x8 ...) => (VPXOR128 ...)
 (XorInt16x16 ...) => (VPXOR256 ...)
+(XorInt16x32 ...) => (VPXORD512 ...)
 (XorInt32x4 ...) => (VPXOR128 ...)
 (XorInt32x8 ...) => (VPXOR256 ...)
 (XorInt32x16 ...) => (VPXORD512 ...)
@@ -1783,8 +1797,10 @@
 (XorInt64x8 ...) => (VPXORQ512 ...)
 (XorUint8x16 ...) => (VPXOR128 ...)
 (XorUint8x32 ...) => (VPXOR256 ...)
+(XorUint8x64 ...) => (VPXORD512 ...)
 (XorUint16x8 ...) => (VPXOR128 ...)
 (XorUint16x16 ...) => (VPXOR256 ...)
+(XorUint16x32 ...) => (VPXORD512 ...)
 (XorUint32x4 ...) => (VPXOR128 ...)
 (XorUint32x8 ...) => (VPXOR256 ...)
 (XorUint32x16 ...) => (VPXORD512 ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index 6853c3b091..492a994e93 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -99,8 +99,10 @@ func simdGenericOps() []opData {
 		{name: "AddUint64x8", argLength: 2, commutative: true},
 		{name: "AndInt8x16", argLength: 2, commutative: true},
 		{name: "AndInt8x32", argLength: 2, commutative: true},
+		{name: "AndInt8x64", argLength: 2, commutative: true},
 		{name: "AndInt16x8", argLength: 2, commutative: true},
 		{name: "AndInt16x16", argLength: 2, commutative: true},
+		{name: "AndInt16x32", argLength: 2, commutative: true},
 		{name: "AndInt32x4", argLength: 2, commutative: true},
 		{name: "AndInt32x8", argLength: 2, commutative: true},
 		{name: "AndInt32x16", argLength: 2, commutative: true},
@@ -121,8 +123,10 @@ func simdGenericOps() []opData {
 		{name: "AndMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "AndNotInt8x16", argLength: 2, commutative: false},
 		{name: "AndNotInt8x32", argLength: 2, commutative: false},
+		{name: "AndNotInt8x64", argLength: 2, commutative: false},
 		{name: "AndNotInt16x8", argLength: 2, commutative: false},
 		{name: "AndNotInt16x16", argLength: 2, commutative: false},
+		{name: "AndNotInt16x32", argLength: 2, commutative: false},
 		{name: "AndNotInt32x4", argLength: 2, commutative: false},
 		{name: "AndNotInt32x8", argLength: 2, commutative: false},
 		{name: "AndNotInt32x16", argLength: 2, commutative: false},
@@ -143,8 +147,10 @@ func simdGenericOps() []opData {
 		{name: "AndNotMaskedUint64x8", argLength: 3, commutative: false},
 		{name: "AndNotUint8x16", argLength: 2, commutative: false},
 		{name: "AndNotUint8x32", argLength: 2, commutative: false},
+		{name: "AndNotUint8x64", argLength: 2, commutative: false},
 		{name: "AndNotUint16x8", argLength: 2, commutative: false},
 		{name: "AndNotUint16x16", argLength: 2, commutative: false},
+		{name: "AndNotUint16x32", argLength: 2, commutative: false},
 		{name: "AndNotUint32x4", argLength: 2, commutative: false},
 		{name: "AndNotUint32x8", argLength: 2, commutative: false},
 		{name: "AndNotUint32x16", argLength: 2, commutative: false},
@@ -153,8 +159,10 @@ func simdGenericOps() []opData {
 		{name: "AndNotUint64x8", argLength: 2, commutative: false},
 		{name: "AndUint8x16", argLength: 2, commutative: true},
 		{name: "AndUint8x32", argLength: 2, commutative: true},
+		{name: "AndUint8x64", argLength: 2, commutative: true},
 		{name: "AndUint16x8", argLength: 2, commutative: true},
 		{name: "AndUint16x16", argLength: 2, commutative: true},
+		{name: "AndUint16x32", argLength: 2, commutative: true},
 		{name: "AndUint32x4", argLength: 2, commutative: true},
 		{name: "AndUint32x8", argLength: 2, commutative: true},
 		{name: "AndUint32x16", argLength: 2, commutative: true},
@@ -868,8 +876,10 @@ func simdGenericOps() []opData {
 		{name: "NotEqualUint64x8", argLength: 2, commutative: true},
 		{name: "OrInt8x16", argLength: 2, commutative: true},
 		{name: "OrInt8x32", argLength: 2, commutative: true},
+		{name: "OrInt8x64", argLength: 2, commutative: true},
 		{name: "OrInt16x8", argLength: 2, commutative: true},
 		{name: "OrInt16x16", argLength: 2, commutative: true},
+		{name: "OrInt16x32", argLength: 2, commutative: true},
 		{name: "OrInt32x4", argLength: 2, commutative: true},
 		{name: "OrInt32x8", argLength: 2, commutative: true},
 		{name: "OrInt32x16", argLength: 2, commutative: true},
@@ -890,8 +900,10 @@ func simdGenericOps() []opData {
 		{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "OrUint8x16", argLength: 2, commutative: true},
 		{name: "OrUint8x32", argLength: 2, commutative: true},
+		{name: "OrUint8x64", argLength: 2, commutative: true},
 		{name: "OrUint16x8", argLength: 2, commutative: true},
 		{name: "OrUint16x16", argLength: 2, commutative: true},
+		{name: "OrUint16x32", argLength: 2, commutative: true},
 		{name: "OrUint32x4", argLength: 2, commutative: true},
 		{name: "OrUint32x8", argLength: 2, commutative: true},
 		{name: "OrUint32x16", argLength: 2, commutative: true},
@@ -1512,8 +1524,10 @@ func simdGenericOps() []opData {
 		{name: "UnsignedSignedQuadDotProdAccumulateMaskedInt32x16", argLength: 4, commutative: false},
 		{name: "XorInt8x16", argLength: 2, commutative: true},
 		{name: "XorInt8x32", argLength: 2, commutative: true},
+		{name: "XorInt8x64", argLength: 2, commutative: true},
 		{name: "XorInt16x8", argLength: 2, commutative: true},
 		{name: "XorInt16x16", argLength: 2, commutative: true},
+		{name: "XorInt16x32", argLength: 2, commutative: true},
 		{name: "XorInt32x4", argLength: 2, commutative: true},
 		{name: "XorInt32x8", argLength: 2, commutative: true},
 		{name: "XorInt32x16", argLength: 2, commutative: true},
@@ -1534,8 +1548,10 @@ func simdGenericOps() []opData {
 		{name: "XorMaskedUint64x8", argLength: 3, commutative: true},
 		{name: "XorUint8x16", argLength: 2, commutative: true},
 		{name: "XorUint8x32", argLength: 2, commutative: true},
+		{name: "XorUint8x64", argLength: 2, commutative: true},
 		{name: "XorUint16x8", argLength: 2, commutative: true},
 		{name: "XorUint16x16", argLength: 2, commutative: true},
+		{name: "XorUint16x32", argLength: 2, commutative: true},
 		{name: "XorUint32x4", argLength: 2, commutative: true},
 		{name: "XorUint32x8", argLength: 2, commutative: true},
 		{name: "XorUint32x16", argLength: 2, commutative: true},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 7427137b22..e8a5354c00 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -4585,8 +4585,10 @@ const (
 	OpAddUint64x8
 	OpAndInt8x16
 	OpAndInt8x32
+	OpAndInt8x64
 	OpAndInt16x8
 	OpAndInt16x16
+	OpAndInt16x32
 	OpAndInt32x4
 	OpAndInt32x8
 	OpAndInt32x16
@@ -4607,8 +4609,10 @@ const (
 	OpAndMaskedUint64x8
 	OpAndNotInt8x16
 	OpAndNotInt8x32
+	OpAndNotInt8x64
 	OpAndNotInt16x8
 	OpAndNotInt16x16
+	OpAndNotInt16x32
 	OpAndNotInt32x4
 	OpAndNotInt32x8
 	OpAndNotInt32x16
@@ -4629,8 +4633,10 @@ const (
 	OpAndNotMaskedUint64x8
 	OpAndNotUint8x16
 	OpAndNotUint8x32
+	OpAndNotUint8x64
 	OpAndNotUint16x8
 	OpAndNotUint16x16
+	OpAndNotUint16x32
 	OpAndNotUint32x4
 	OpAndNotUint32x8
 	OpAndNotUint32x16
@@ -4639,8 +4645,10 @@ const (
 	OpAndNotUint64x8
 	OpAndUint8x16
 	OpAndUint8x32
+	OpAndUint8x64
 	OpAndUint16x8
 	OpAndUint16x16
+	OpAndUint16x32
 	OpAndUint32x4
 	OpAndUint32x8
 	OpAndUint32x16
@@ -5354,8 +5362,10 @@ const (
 	OpNotEqualUint64x8
 	OpOrInt8x16
 	OpOrInt8x32
+	OpOrInt8x64
 	OpOrInt16x8
 	OpOrInt16x16
+	OpOrInt16x32
 	OpOrInt32x4
 	OpOrInt32x8
 	OpOrInt32x16
@@ -5376,8 +5386,10 @@ const (
 	OpOrMaskedUint64x8
 	OpOrUint8x16
 	OpOrUint8x32
+	OpOrUint8x64
 	OpOrUint16x8
 	OpOrUint16x16
+	OpOrUint16x32
 	OpOrUint32x4
 	OpOrUint32x8
 	OpOrUint32x16
@@ -5998,8 +6010,10 @@ const (
 	OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16
 	OpXorInt8x16
 	OpXorInt8x32
+	OpXorInt8x64
 	OpXorInt16x8
 	OpXorInt16x16
+	OpXorInt16x32
 	OpXorInt32x4
 	OpXorInt32x8
 	OpXorInt32x16
@@ -6020,8 +6034,10 @@ const (
 	OpXorMaskedUint64x8
 	OpXorUint8x16
 	OpXorUint8x32
+	OpXorUint8x64
 	OpXorUint16x8
 	OpXorUint16x16
+	OpXorUint16x32
 	OpXorUint32x4
 	OpXorUint32x8
 	OpXorUint32x16
@@ -62211,6 +62227,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "AndInt8x64",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "AndInt16x8",
 		argLen:      2,
@@ -62223,6 +62245,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "AndInt16x32",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "AndInt32x4",
 		argLen:      2,
@@ -62341,6 +62369,11 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "AndNotInt8x64",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "AndNotInt16x8",
 		argLen:  2,
@@ -62351,6 +62384,11 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "AndNotInt16x32",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "AndNotInt32x4",
 		argLen:  2,
@@ -62451,6 +62489,11 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "AndNotUint8x64",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "AndNotUint16x8",
 		argLen:  2,
@@ -62461,6 +62504,11 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
+	{
+		name:    "AndNotUint16x32",
+		argLen:  2,
+		generic: true,
+	},
 	{
 		name:    "AndNotUint32x4",
 		argLen:  2,
@@ -62503,6 +62551,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "AndUint8x64",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "AndUint16x8",
 		argLen:      2,
@@ -62515,6 +62569,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "AndUint16x32",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "AndUint32x4",
 		argLen:      2,
@@ -66413,6 +66473,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "OrInt8x64",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "OrInt16x8",
 		argLen:      2,
@@ -66425,6 +66491,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "OrInt16x32",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "OrInt32x4",
 		argLen:      2,
@@ -66545,6 +66617,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "OrUint8x64",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "OrUint16x8",
 		argLen:      2,
@@ -66557,6 +66635,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "OrUint16x32",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "OrUint32x4",
 		argLen:      2,
@@ -69689,6 +69773,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "XorInt8x64",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "XorInt16x8",
 		argLen:      2,
@@ -69701,6 +69791,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "XorInt16x32",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "XorInt32x4",
 		argLen:      2,
@@ -69821,6 +69917,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "XorUint8x64",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "XorUint16x8",
 		argLen:      2,
@@ -69833,6 +69935,12 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
+	{
+		name:        "XorUint16x32",
+		argLen:      2,
+		commutative: true,
+		generic:     true,
+	},
 	{
 		name:        "XorUint32x4",
 		argLen:      2,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 5abb50ab71..82f13b43c6 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -831,6 +831,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndInt16x16:
 		v.Op = OpAMD64VPAND256
 		return true
+	case OpAndInt16x32:
+		v.Op = OpAMD64VPANDD512
+		return true
 	case OpAndInt16x8:
 		v.Op = OpAMD64VPAND128
 		return true
@@ -858,6 +861,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndInt8x32:
 		v.Op = OpAMD64VPAND256
 		return true
+	case OpAndInt8x64:
+		v.Op = OpAMD64VPANDD512
+		return true
 	case OpAndMaskedInt32x16:
 		return rewriteValueAMD64_OpAndMaskedInt32x16(v)
 	case OpAndMaskedInt32x4:
@@ -885,6 +891,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndNotInt16x16:
 		v.Op = OpAMD64VPANDN256
 		return true
+	case OpAndNotInt16x32:
+		v.Op = OpAMD64VPANDND512
+		return true
 	case OpAndNotInt16x8:
 		v.Op = OpAMD64VPANDN128
 		return true
@@ -912,6 +921,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndNotInt8x32:
 		v.Op = OpAMD64VPANDN256
 		return true
+	case OpAndNotInt8x64:
+		v.Op = OpAMD64VPANDND512
+		return true
 	case OpAndNotMaskedInt32x16:
 		return rewriteValueAMD64_OpAndNotMaskedInt32x16(v)
 	case OpAndNotMaskedInt32x4:
@@ -939,6 +951,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndNotUint16x16:
 		v.Op = OpAMD64VPANDN256
 		return true
+	case OpAndNotUint16x32:
+		v.Op = OpAMD64VPANDND512
+		return true
 	case OpAndNotUint16x8:
 		v.Op = OpAMD64VPANDN128
 		return true
@@ -966,9 +981,15 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndNotUint8x32:
 		v.Op = OpAMD64VPANDN256
 		return true
+	case OpAndNotUint8x64:
+		v.Op = OpAMD64VPANDND512
+		return true
 	case OpAndUint16x16:
 		v.Op = OpAMD64VPAND256
 		return true
+	case OpAndUint16x32:
+		v.Op = OpAMD64VPANDD512
+		return true
 	case OpAndUint16x8:
 		v.Op = OpAMD64VPAND128
 		return true
@@ -996,6 +1017,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpAndUint8x32:
 		v.Op = OpAMD64VPAND256
 		return true
+	case OpAndUint8x64:
+		v.Op = OpAMD64VPANDD512
+		return true
 	case OpApproximateReciprocalFloat32x16:
 		v.Op = OpAMD64VRCP14PS512
 		return true
@@ -3274,6 +3298,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpOrInt16x16:
 		v.Op = OpAMD64VPOR256
 		return true
+	case OpOrInt16x32:
+		v.Op = OpAMD64VPORD512
+		return true
 	case OpOrInt16x8:
 		v.Op = OpAMD64VPOR128
 		return true
@@ -3301,6 +3328,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpOrInt8x32:
 		v.Op = OpAMD64VPOR256
 		return true
+	case OpOrInt8x64:
+		v.Op = OpAMD64VPORD512
+		return true
 	case OpOrMaskedInt32x16:
 		return rewriteValueAMD64_OpOrMaskedInt32x16(v)
 	case OpOrMaskedInt32x4:
@@ -3328,6 +3358,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpOrUint16x16:
 		v.Op = OpAMD64VPOR256
 		return true
+	case OpOrUint16x32:
+		v.Op = OpAMD64VPORD512
+		return true
 	case OpOrUint16x8:
 		v.Op = OpAMD64VPOR128
 		return true
@@ -3355,6 +3388,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpOrUint8x32:
 		v.Op = OpAMD64VPOR256
 		return true
+	case OpOrUint8x64:
+		v.Op = OpAMD64VPORD512
+		return true
 	case OpPairDotProdInt16x16:
 		v.Op = OpAMD64VPMADDWD256
 		return true
@@ -5537,6 +5573,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpXorInt16x16:
 		v.Op = OpAMD64VPXOR256
 		return true
+	case OpXorInt16x32:
+		v.Op = OpAMD64VPXORD512
+		return true
 	case OpXorInt16x8:
 		v.Op = OpAMD64VPXOR128
 		return true
@@ -5564,6 +5603,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpXorInt8x32:
 		v.Op = OpAMD64VPXOR256
 		return true
+	case OpXorInt8x64:
+		v.Op = OpAMD64VPXORD512
+		return true
 	case OpXorMaskedInt32x16:
 		return rewriteValueAMD64_OpXorMaskedInt32x16(v)
 	case OpXorMaskedInt32x4:
@@ -5591,6 +5633,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpXorUint16x16:
 		v.Op = OpAMD64VPXOR256
 		return true
+	case OpXorUint16x32:
+		v.Op = OpAMD64VPXORD512
+		return true
 	case OpXorUint16x8:
 		v.Op = OpAMD64VPXOR128
 		return true
@@ -5618,6 +5663,9 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpXorUint8x32:
 		v.Op = OpAMD64VPXOR256
 		return true
+	case OpXorUint8x64:
+		v.Op = OpAMD64VPXORD512
+		return true
 	case OpZero:
 		return rewriteValueAMD64_OpZero(v)
 	case OpZeroExt16to32:
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 12c388ca91..7a7367ee1e 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -107,8 +107,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x4.AddSub", opLen2(ssa.OpAddSubFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x16.And", opLen2(ssa.OpAndInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.And", opLen2(ssa.OpAndInt8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int8x64.And", opLen2(ssa.OpAndInt8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.And", opLen2(ssa.OpAndInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.And", opLen2(ssa.OpAndInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.And", opLen2(ssa.OpAndInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
@@ -117,8 +119,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Int64x8.And", opLen2(ssa.OpAndInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint8x16.And", opLen2(ssa.OpAndUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.And", opLen2(ssa.OpAndUint8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint8x64.And", opLen2(ssa.OpAndUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.And", opLen2(ssa.OpAndUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.And", opLen2(ssa.OpAndUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.And", opLen2(ssa.OpAndUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.And", opLen2(ssa.OpAndUint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x8.And", opLen2(ssa.OpAndUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.And", opLen2(ssa.OpAndUint32x16, types.TypeVec512), sys.AMD64)
@@ -139,8 +143,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x8.AndMasked", opLen3(ssa.OpAndMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.AndNot", opLen2_21(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.AndNot", opLen2_21(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int8x64.AndNot", opLen2_21(ssa.OpAndNotInt8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.AndNot", opLen2_21(ssa.OpAndNotInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.AndNot", opLen2_21(ssa.OpAndNotInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.AndNot", opLen2_21(ssa.OpAndNotInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.AndNot", opLen2_21(ssa.OpAndNotInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.AndNot", opLen2_21(ssa.OpAndNotInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.AndNot", opLen2_21(ssa.OpAndNotInt32x16, types.TypeVec512), sys.AMD64)
@@ -149,8 +155,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Int64x8.AndNot", opLen2_21(ssa.OpAndNotInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint8x16.AndNot", opLen2_21(ssa.OpAndNotUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.AndNot", opLen2_21(ssa.OpAndNotUint8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint8x64.AndNot", opLen2_21(ssa.OpAndNotUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.AndNot", opLen2_21(ssa.OpAndNotUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.AndNot", opLen2_21(ssa.OpAndNotUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.AndNot", opLen2_21(ssa.OpAndNotUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.AndNot", opLen2_21(ssa.OpAndNotUint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x8.AndNot", opLen2_21(ssa.OpAndNotUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.AndNot", opLen2_21(ssa.OpAndNotUint32x16, types.TypeVec512), sys.AMD64)
@@ -978,8 +986,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Uint64x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int8x64.Or", opLen2(ssa.OpOrInt8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.Or", opLen2(ssa.OpOrInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.Or", opLen2(ssa.OpOrInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.Or", opLen2(ssa.OpOrInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
@@ -988,8 +998,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Int64x8.Or", opLen2(ssa.OpOrInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint8x16.Or", opLen2(ssa.OpOrUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.Or", opLen2(ssa.OpOrUint8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint8x64.Or", opLen2(ssa.OpOrUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.Or", opLen2(ssa.OpOrUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.Or", opLen2(ssa.OpOrUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.Or", opLen2(ssa.OpOrUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.Or", opLen2(ssa.OpOrUint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x8.Or", opLen2(ssa.OpOrUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.Or", opLen2(ssa.OpOrUint32x16, types.TypeVec512), sys.AMD64)
@@ -1784,8 +1796,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Int8x64.UnsignedSignedQuadDotProdAccumulateMasked", opLen4_31(ssa.OpUnsignedSignedQuadDotProdAccumulateMaskedInt32x16, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int8x64.Xor", opLen2(ssa.OpXorInt8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int16x8.Xor", opLen2(ssa.OpXorInt16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int16x16.Xor", opLen2(ssa.OpXorInt16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Int16x32.Xor", opLen2(ssa.OpXorInt16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Int32x4.Xor", opLen2(ssa.OpXorInt32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Int32x8.Xor", opLen2(ssa.OpXorInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.Xor", opLen2(ssa.OpXorInt32x16, types.TypeVec512), sys.AMD64)
@@ -1794,8 +1808,10 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Int64x8.Xor", opLen2(ssa.OpXorInt64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint8x16.Xor", opLen2(ssa.OpXorUint8x16, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint8x32.Xor", opLen2(ssa.OpXorUint8x32, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint8x64.Xor", opLen2(ssa.OpXorUint8x64, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint16x8.Xor", opLen2(ssa.OpXorUint16x8, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint16x16.Xor", opLen2(ssa.OpXorUint16x16, types.TypeVec256), sys.AMD64)
+	addF(simdPackage, "Uint16x32.Xor", opLen2(ssa.OpXorUint16x32, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Uint32x4.Xor", opLen2(ssa.OpXorUint32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Uint32x8.Xor", opLen2(ssa.OpXorUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.Xor", opLen2(ssa.OpXorUint32x16, types.TypeVec512), sys.AMD64)
diff --git a/src/simd/binary_test.go b/src/simd/binary_test.go
index 4221e74144..b7daf736f4 100644
--- a/src/simd/binary_test.go
+++ b/src/simd/binary_test.go
@@ -230,12 +230,12 @@ func TestAndNot(t *testing.T) {
 	testUint8x32Binary(t, simd.Uint8x32.AndNot, andNotSlice[uint8])
 
 	if simd.HasAVX512() {
-		//	testInt8x64Binary(t, simd.Int8x64.AndNot, andNotSlice[int8]) // missing
-		//	testInt16x32Binary(t, simd.Int16x32.AndNot, andNotSlice[int16]) // missing
+		testInt8x64Binary(t, simd.Int8x64.AndNot, andNotSlice[int8])
+		testInt16x32Binary(t, simd.Int16x32.AndNot, andNotSlice[int16])
 		testInt32x16Binary(t, simd.Int32x16.AndNot, andNotSlice[int32])
 		testInt64x8Binary(t, simd.Int64x8.AndNot, andNotSlice[int64])
-		//	testUint8x64Binary(t, simd.Uint8x64.AndNot, andNotSlice[uint8]) // missing
-		//	testUint16x32Binary(t, simd.Uint16x32.AndNot, andNotSlice[uint16]) // missing
+		testUint8x64Binary(t, simd.Uint8x64.AndNot, andNotSlice[uint8])
+		testUint16x32Binary(t, simd.Uint16x32.AndNot, andNotSlice[uint16])
 		testUint32x16Binary(t, simd.Uint32x16.AndNot, andNotSlice[uint32])
 		testUint64x8Binary(t, simd.Uint64x8.AndNot, andNotSlice[uint64])
 	}
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index ea0c598157..5776350fe9 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -590,6 +590,11 @@ func (x Int8x16) And(y Int8x16) Int8x16
 // Asm: VPAND, CPU Feature: AVX2
 func (x Int8x32) And(y Int8x32) Int8x32
 
+// And performs a bitwise AND operation between two vectors.
+//
+// Asm: VPANDD, CPU Feature: AVX512F
+func (x Int8x64) And(y Int8x64) Int8x64
+
 // And performs a bitwise AND operation between two vectors.
 //
 // Asm: VPAND, CPU Feature: AVX
@@ -600,6 +605,11 @@ func (x Int16x8) And(y Int16x8) Int16x8
 // Asm: VPAND, CPU Feature: AVX2
 func (x Int16x16) And(y Int16x16) Int16x16
 
+// And performs a bitwise AND operation between two vectors.
+//
+// Asm: VPANDD, CPU Feature: AVX512F
+func (x Int16x32) And(y Int16x32) Int16x32
+
 // And performs a bitwise AND operation between two vectors.
 //
 // Asm: VPAND, CPU Feature: AVX
@@ -640,6 +650,11 @@ func (x Uint8x16) And(y Uint8x16) Uint8x16
 // Asm: VPAND, CPU Feature: AVX2
 func (x Uint8x32) And(y Uint8x32) Uint8x32
 
+// And performs a bitwise AND operation between two vectors.
+//
+// Asm: VPANDD, CPU Feature: AVX512F
+func (x Uint8x64) And(y Uint8x64) Uint8x64
+
 // And performs a bitwise AND operation between two vectors.
 //
 // Asm: VPAND, CPU Feature: AVX
@@ -650,6 +665,11 @@ func (x Uint16x8) And(y Uint16x8) Uint16x8
 // Asm: VPAND, CPU Feature: AVX2
 func (x Uint16x16) And(y Uint16x16) Uint16x16
 
+// And performs a bitwise AND operation between two vectors.
+//
+// Asm: VPANDD, CPU Feature: AVX512F
+func (x Uint16x32) And(y Uint16x32) Uint16x32
+
 // And performs a bitwise AND operation between two vectors.
 //
 // Asm: VPAND, CPU Feature: AVX
@@ -778,6 +798,11 @@ func (x Int8x16) AndNot(y Int8x16) Int8x16
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Int8x32) AndNot(y Int8x32) Int8x32
 
+// AndNot performs a bitwise x &^ y.
+//
+// Asm: VPANDND, CPU Feature: AVX512F
+func (x Int8x64) AndNot(y Int8x64) Int8x64
+
 // AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
@@ -788,6 +813,11 @@ func (x Int16x8) AndNot(y Int16x8) Int16x8
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Int16x16) AndNot(y Int16x16) Int16x16
 
+// AndNot performs a bitwise x &^ y.
+//
+// Asm: VPANDND, CPU Feature: AVX512F
+func (x Int16x32) AndNot(y Int16x32) Int16x32
+
 // AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
@@ -828,6 +858,11 @@ func (x Uint8x16) AndNot(y Uint8x16) Uint8x16
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Uint8x32) AndNot(y Uint8x32) Uint8x32
 
+// AndNot performs a bitwise x &^ y.
+//
+// Asm: VPANDND, CPU Feature: AVX512F
+func (x Uint8x64) AndNot(y Uint8x64) Uint8x64
+
 // AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
@@ -838,6 +873,11 @@ func (x Uint16x8) AndNot(y Uint16x8) Uint16x8
 // Asm: VPANDN, CPU Feature: AVX2
 func (x Uint16x16) AndNot(y Uint16x16) Uint16x16
 
+// AndNot performs a bitwise x &^ y.
+//
+// Asm: VPANDND, CPU Feature: AVX512F
+func (x Uint16x32) AndNot(y Uint16x32) Uint16x32
+
 // AndNot performs a bitwise x &^ y.
 //
 // Asm: VPANDN, CPU Feature: AVX
@@ -6183,6 +6223,11 @@ func (x Int8x16) Or(y Int8x16) Int8x16
 // Asm: VPOR, CPU Feature: AVX2
 func (x Int8x32) Or(y Int8x32) Int8x32
 
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPORD, CPU Feature: AVX512F
+func (x Int8x64) Or(y Int8x64) Int8x64
+
 // Or performs a bitwise OR operation between two vectors.
 //
 // Asm: VPOR, CPU Feature: AVX
@@ -6193,6 +6238,11 @@ func (x Int16x8) Or(y Int16x8) Int16x8
 // Asm: VPOR, CPU Feature: AVX2
 func (x Int16x16) Or(y Int16x16) Int16x16
 
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPORD, CPU Feature: AVX512F
+func (x Int16x32) Or(y Int16x32) Int16x32
+
 // Or performs a bitwise OR operation between two vectors.
 //
 // Asm: VPOR, CPU Feature: AVX
@@ -6233,6 +6283,11 @@ func (x Uint8x16) Or(y Uint8x16) Uint8x16
 // Asm: VPOR, CPU Feature: AVX2
 func (x Uint8x32) Or(y Uint8x32) Uint8x32
 
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPORD, CPU Feature: AVX512F
+func (x Uint8x64) Or(y Uint8x64) Uint8x64
+
 // Or performs a bitwise OR operation between two vectors.
 //
 // Asm: VPOR, CPU Feature: AVX
@@ -6243,6 +6298,11 @@ func (x Uint16x8) Or(y Uint16x8) Uint16x8
 // Asm: VPOR, CPU Feature: AVX2
 func (x Uint16x16) Or(y Uint16x16) Uint16x16
 
+// Or performs a bitwise OR operation between two vectors.
+//
+// Asm: VPORD, CPU Feature: AVX512F
+func (x Uint16x32) Or(y Uint16x32) Uint16x32
+
 // Or performs a bitwise OR operation between two vectors.
 //
 // Asm: VPOR, CPU Feature: AVX
@@ -11867,6 +11927,11 @@ func (x Int8x16) Xor(y Int8x16) Int8x16
 // Asm: VPXOR, CPU Feature: AVX2
 func (x Int8x32) Xor(y Int8x32) Int8x32
 
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXORD, CPU Feature: AVX512F
+func (x Int8x64) Xor(y Int8x64) Int8x64
+
 // Xor performs a bitwise XOR operation between two vectors.
 //
 // Asm: VPXOR, CPU Feature: AVX
@@ -11877,6 +11942,11 @@ func (x Int16x8) Xor(y Int16x8) Int16x8
 // Asm: VPXOR, CPU Feature: AVX2
 func (x Int16x16) Xor(y Int16x16) Int16x16
 
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXORD, CPU Feature: AVX512F
+func (x Int16x32) Xor(y Int16x32) Int16x32
+
 // Xor performs a bitwise XOR operation between two vectors.
 //
 // Asm: VPXOR, CPU Feature: AVX
@@ -11917,6 +11987,11 @@ func (x Uint8x16) Xor(y Uint8x16) Uint8x16
 // Asm: VPXOR, CPU Feature: AVX2
 func (x Uint8x32) Xor(y Uint8x32) Uint8x32
 
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXORD, CPU Feature: AVX512F
+func (x Uint8x64) Xor(y Uint8x64) Uint8x64
+
 // Xor performs a bitwise XOR operation between two vectors.
 //
 // Asm: VPXOR, CPU Feature: AVX
@@ -11927,6 +12002,11 @@ func (x Uint16x8) Xor(y Uint16x8) Uint16x8
 // Asm: VPXOR, CPU Feature: AVX2
 func (x Uint16x16) Xor(y Uint16x16) Uint16x16
 
+// Xor performs a bitwise XOR operation between two vectors.
+//
+// Asm: VPXORD, CPU Feature: AVX512F
+func (x Uint16x32) Xor(y Uint16x32) Uint16x32
+
 // Xor performs a bitwise XOR operation between two vectors.
 //
 // Asm: VPXOR, CPU Feature: AVX
-- 
2.52.0