[dev.simd] cmd/compile, simd: remove mask load and stores

author Junyang Shao <shaojunyang@google.com>

Fri, 10 Oct 2025 17:42:59 +0000 (17:42 +0000)

committer Junyang Shao <shaojunyang@google.com>

Tue, 14 Oct 2025 19:26:37 +0000 (12:26 -0700)
author Junyang Shao <shaojunyang@google.com>
Fri, 10 Oct 2025 17:42:59 +0000 (17:42 +0000)
committer Junyang Shao <shaojunyang@google.com>
Tue, 14 Oct 2025 19:26:37 +0000 (12:26 -0700)
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules

index 3689c124114133b5ccb66adb53cbfa31da734708..2b4487196031e04c81dca1e0f01e12cd7e1bfe1b 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@@ -1641,41 +1641,6 @@
  
  // SIMD lowering rules
  
-// Mask loads
-(LoadMask8x16 <t> ptr mem) => (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask8x32 <t> ptr mem) => (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask8x64 <t> ptr mem) => (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(LoadMask16x8 <t> ptr mem) => (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask16x16 <t> ptr mem) => (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask16x32 <t> ptr mem) => (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(LoadMask32x4 <t> ptr mem) => (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask32x8 <t> ptr mem) => (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask32x16 <t> ptr mem) => (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(LoadMask64x2 <t> ptr mem) => (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask64x4 <t> ptr mem) => (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask64x8 <t> ptr mem) => (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(StoreMask8x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
-(StoreMask8x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
-(StoreMask8x64 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
-
-(StoreMask16x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
-(StoreMask16x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
-(StoreMask16x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
-
-(StoreMask32x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
-(StoreMask32x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
-(StoreMask32x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
-
-(StoreMask64x2 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
-(StoreMask64x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
-(StoreMask64x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
-
-// TODO is this correct?  Should we just do it all from 64-bits?
-
  // Mask conversions
  // integers to masks
  (Cvt16toMask8x16 <t> x) => (VPMOVMToVec8x16 <types.TypeVec128> (KMOVWk <t> x))
diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go

index 6b94fea819795db8fec1ccba07f62eb882cf244e..18bd8d7fe9af7c39c3b339e987333dd716140fc3 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go
@@ -676,32 +676,7 @@ var genericOps = []opData{
         {name: "PrefetchCacheStreamed", argLength: 2, hasSideEffects: true}, // Do non-temporal or streamed prefetch arg0 to cache. arg0=addr, arg1=memory.
  
         // SIMD
-       {name: "ZeroSIMD", argLength: 0},      // zero value of a vector
-       {name: "LoadMask8x16", argLength: 2},  // arg0 = ptr, arg1 = mem
-       {name: "LoadMask8x32", argLength: 2},  // arg0 = ptr, arg1 = mem
-       {name: "LoadMask8x64", argLength: 2},  // arg0 = ptr, arg1 = mem
-       {name: "LoadMask16x8", argLength: 2},  // arg0 = ptr, arg1 = mem
-       {name: "LoadMask16x16", argLength: 2}, // arg0 = ptr, arg1 = mem
-       {name: "LoadMask16x32", argLength: 2}, // arg0 = ptr, arg1 = mem
-       {name: "LoadMask32x4", argLength: 2},  // arg0 = ptr, arg1 = mem
-       {name: "LoadMask32x8", argLength: 2},  // arg0 = ptr, arg1 = mem
-       {name: "LoadMask32x16", argLength: 2}, // arg0 = ptr, arg1 = mem
-       {name: "LoadMask64x2", argLength: 2},  // arg0 = ptr, arg1 = mem
-       {name: "LoadMask64x4", argLength: 2},  // arg0 = ptr, arg1 = mem
-       {name: "LoadMask64x8", argLength: 2},  // arg0 = ptr, arg1 = mem
-
-       {name: "StoreMask8x16", argLength: 3, typ: "Mem", aux: "Typ"},  // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask8x32", argLength: 3, typ: "Mem", aux: "Typ"},  // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask8x64", argLength: 3, typ: "Mem", aux: "Typ"},  // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask16x8", argLength: 3, typ: "Mem", aux: "Typ"},  // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask16x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask16x32", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask32x4", argLength: 3, typ: "Mem", aux: "Typ"},  // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask32x8", argLength: 3, typ: "Mem", aux: "Typ"},  // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask32x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask64x2", argLength: 3, typ: "Mem", aux: "Typ"},  // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask64x4", argLength: 3, typ: "Mem", aux: "Typ"},  // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
-       {name: "StoreMask64x8", argLength: 3, typ: "Mem", aux: "Typ"},  // Store arg1 to arg0.  arg2=memory, aux=type.  Returns memory.
+       {name: "ZeroSIMD", argLength: 0}, // zero value of a vector
  
         // Convert integers to masks
         {name: "Cvt16toMask8x16", argLength: 1},  // arg0 = integer mask value
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go

index aef077bb8e26c9c86f1e0cf702ec6bd4de7c5a24..08b6bffd0ef20e65104cdc31372ca3fdaa19e14b 100644 (file)
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -5364,30 +5364,6 @@ const (
         OpPrefetchCache
         OpPrefetchCacheStreamed
         OpZeroSIMD
-       OpLoadMask8x16
-       OpLoadMask8x32
-       OpLoadMask8x64
-       OpLoadMask16x8
-       OpLoadMask16x16
-       OpLoadMask16x32
-       OpLoadMask32x4
-       OpLoadMask32x8
-       OpLoadMask32x16
-       OpLoadMask64x2
-       OpLoadMask64x4
-       OpLoadMask64x8
-       OpStoreMask8x16
-       OpStoreMask8x32
-       OpStoreMask8x64
-       OpStoreMask16x8
-       OpStoreMask16x16
-       OpStoreMask16x32
-       OpStoreMask32x4
-       OpStoreMask32x8
-       OpStoreMask32x16
-       OpStoreMask64x2
-       OpStoreMask64x4
-       OpStoreMask64x8
         OpCvt16toMask8x16
         OpCvt32toMask8x32
         OpCvt64toMask8x64
@@ -75965,138 +75941,6 @@ var opcodeTable = [...]opInfo{
                 argLen:  0,
                 generic: true,
         },
-       {
-               name:    "LoadMask8x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask8x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask8x64",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask16x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask16x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask16x32",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask32x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask32x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask32x16",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask64x2",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask64x4",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "LoadMask64x8",
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "StoreMask8x16",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask8x32",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask8x64",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask16x8",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask16x16",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask16x32",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask32x4",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask32x8",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask32x16",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask64x2",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask64x4",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "StoreMask64x8",
-               auxType: auxTyp,
-               argLen:  3,
-               generic: true,
-       },
         {
                 name:    "Cvt16toMask8x16",
                 argLen:  1,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go

index 84bb4c1148705fbe801e9cb820ac8b50819cd780..5220a0a73c2dbe5e2328caa184ad665a1f0c60c1 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -3769,30 +3769,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return rewriteValueAMD64_OpLessUint8x64(v)
         case OpLoad:
                 return rewriteValueAMD64_OpLoad(v)
-       case OpLoadMask16x16:
-               return rewriteValueAMD64_OpLoadMask16x16(v)
-       case OpLoadMask16x32:
-               return rewriteValueAMD64_OpLoadMask16x32(v)
-       case OpLoadMask16x8:
-               return rewriteValueAMD64_OpLoadMask16x8(v)
-       case OpLoadMask32x16:
-               return rewriteValueAMD64_OpLoadMask32x16(v)
-       case OpLoadMask32x4:
-               return rewriteValueAMD64_OpLoadMask32x4(v)
-       case OpLoadMask32x8:
-               return rewriteValueAMD64_OpLoadMask32x8(v)
-       case OpLoadMask64x2:
-               return rewriteValueAMD64_OpLoadMask64x2(v)
-       case OpLoadMask64x4:
-               return rewriteValueAMD64_OpLoadMask64x4(v)
-       case OpLoadMask64x8:
-               return rewriteValueAMD64_OpLoadMask64x8(v)
-       case OpLoadMask8x16:
-               return rewriteValueAMD64_OpLoadMask8x16(v)
-       case OpLoadMask8x32:
-               return rewriteValueAMD64_OpLoadMask8x32(v)
-       case OpLoadMask8x64:
-               return rewriteValueAMD64_OpLoadMask8x64(v)
         case OpLoadMasked16:
                 return rewriteValueAMD64_OpLoadMasked16(v)
         case OpLoadMasked32:
@@ -5636,30 +5612,6 @@ func rewriteValueAMD64(v *Value) bool {
                 return true
         case OpStore:
                 return rewriteValueAMD64_OpStore(v)
-       case OpStoreMask16x16:
-               return rewriteValueAMD64_OpStoreMask16x16(v)
-       case OpStoreMask16x32:
-               return rewriteValueAMD64_OpStoreMask16x32(v)
-       case OpStoreMask16x8:
-               return rewriteValueAMD64_OpStoreMask16x8(v)
-       case OpStoreMask32x16:
-               return rewriteValueAMD64_OpStoreMask32x16(v)
-       case OpStoreMask32x4:
-               return rewriteValueAMD64_OpStoreMask32x4(v)
-       case OpStoreMask32x8:
-               return rewriteValueAMD64_OpStoreMask32x8(v)
-       case OpStoreMask64x2:
-               return rewriteValueAMD64_OpStoreMask64x2(v)
-       case OpStoreMask64x4:
-               return rewriteValueAMD64_OpStoreMask64x4(v)
-       case OpStoreMask64x8:
-               return rewriteValueAMD64_OpStoreMask64x8(v)
-       case OpStoreMask8x16:
-               return rewriteValueAMD64_OpStoreMask8x16(v)
-       case OpStoreMask8x32:
-               return rewriteValueAMD64_OpStoreMask8x32(v)
-       case OpStoreMask8x64:
-               return rewriteValueAMD64_OpStoreMask8x64(v)
         case OpStoreMasked16:
                 return rewriteValueAMD64_OpStoreMasked16(v)
         case OpStoreMasked32:
@@ -54997,222 +54949,6 @@ func rewriteValueAMD64_OpLoad(v *Value) bool {
         }
         return false
  }
-func rewriteValueAMD64_OpLoadMask16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask16x16 <t> ptr mem)
-       // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask16x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask16x32 <t> ptr mem)
-       // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec16x32)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask16x8 <t> ptr mem)
-       // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask32x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask32x16 <t> ptr mem)
-       // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec32x16)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask32x4 <t> ptr mem)
-       // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask32x8 <t> ptr mem)
-       // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask64x2 <t> ptr mem)
-       // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask64x4 <t> ptr mem)
-       // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask64x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask64x8 <t> ptr mem)
-       // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec64x8)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask8x16 <t> ptr mem)
-       // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v.Type = types.TypeVec128
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask8x32 <t> ptr mem)
-       // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v.Type = types.TypeVec256
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (LoadMask8x64 <t> ptr mem)
-       // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
-       for {
-               t := v.Type
-               ptr := v_0
-               mem := v_1
-               v.reset(OpAMD64VPMOVMToVec8x64)
-               v.Type = types.TypeVec512
-               v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
-               v0.AddArg2(ptr, mem)
-               v.AddArg(v0)
-               return true
-       }
-}
  func rewriteValueAMD64_OpLoadMasked16(v *Value) bool {
         v_2 := v.Args[2]
         v_1 := v.Args[1]
@@ -59830,234 +59566,6 @@ func rewriteValueAMD64_OpStore(v *Value) bool {
         }
         return false
  }
-func rewriteValueAMD64_OpStoreMask16x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask16x16 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask16x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask16x32 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask16x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask16x8 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask32x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask32x16 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask32x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask32x4 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask32x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask32x8 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask64x2(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask64x2 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask64x4(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask64x4 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask64x8(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask64x8 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask8x16(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask8x16 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask8x32(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask8x32 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
-func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       // match: (StoreMask8x64 {t} ptr val mem)
-       // result: (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
-       for {
-               t := auxToType(v.Aux)
-               ptr := v_0
-               val := v_1
-               mem := v_2
-               v.reset(OpAMD64KMOVQstore)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, t)
-               v0.AddArg(val)
-               v.AddArg3(ptr, v0, mem)
-               return true
-       }
-}
  func rewriteValueAMD64_OpStoreMasked16(v *Value) bool {
         v_3 := v.Args[3]
         v_2 := v.Args[2]
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go

index 6561cbe9a2167f823e3185d56797dd83ebd2d865..f663680fc46f90bdf3f6c0db04d56b65e068e1db 100644 (file)
--- a/src/cmd/compile/internal/ssagen/intrinsics.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics.go
@@ -2024,13 +2024,6 @@ func simdStore() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
         }
  }
  
-var loadMaskOpcodes = map[int]map[int]ssa.Op{
-       8:  {16: ssa.OpLoadMask8x16, 32: ssa.OpLoadMask8x32, 64: ssa.OpLoadMask8x64},
-       16: {8: ssa.OpLoadMask16x8, 16: ssa.OpLoadMask16x16, 32: ssa.OpLoadMask16x32},
-       32: {4: ssa.OpLoadMask32x4, 8: ssa.OpLoadMask32x8, 16: ssa.OpLoadMask32x16},
-       64: {2: ssa.OpLoadMask64x2, 4: ssa.OpLoadMask64x4, 8: ssa.OpLoadMask64x8},
-}
-
  var cvtVToMaskOpcodes = map[int]map[int]ssa.Op{
         8:  {16: ssa.OpCvt16toMask8x16, 32: ssa.OpCvt32toMask8x32, 64: ssa.OpCvt64toMask8x64},
         16: {8: ssa.OpCvt8toMask16x8, 16: ssa.OpCvt16toMask16x16, 32: ssa.OpCvt32toMask16x32},
@@ -2045,33 +2038,6 @@ var cvtMaskToVOpcodes = map[int]map[int]ssa.Op{
         64: {2: ssa.OpCvtMask64x2to8, 4: ssa.OpCvtMask64x4to8, 8: ssa.OpCvtMask64x8to8},
  }
  
-func simdLoadMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-       return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-               op := loadMaskOpcodes[elemBits][lanes]
-               if op == 0 {
-                       panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
-               }
-               return s.newValue2(op, types.TypeMask, args[0], s.mem())
-       }
-}
-
-func simdStoreMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-       return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
-               opCodes := map[int]map[int]ssa.Op{
-                       8:  {16: ssa.OpStoreMask8x16, 32: ssa.OpStoreMask8x32, 64: ssa.OpStoreMask8x64},
-                       16: {8: ssa.OpStoreMask16x8, 16: ssa.OpStoreMask16x16, 32: ssa.OpStoreMask16x32},
-                       32: {4: ssa.OpStoreMask32x4, 8: ssa.OpStoreMask32x8, 16: ssa.OpStoreMask32x16},
-                       64: {2: ssa.OpStoreMask64x2, 4: ssa.OpStoreMask64x4, 8: ssa.OpStoreMask64x8},
-               }
-               op := opCodes[elemBits][lanes]
-               if op == 0 {
-                       panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
-               }
-               s.vars[memVar] = s.newValue3A(op, types.TypeMem, types.TypeMask, args[1], args[0], s.mem())
-               return nil
-       }
-}
-
  func simdCvtVToMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
         return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
                 op := cvtVToMaskOpcodes[elemBits][lanes]
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go

index f2e82d234cd90f4a1e7fb113c550bc4884276dc2..47be7d67a41edf6b518682420c8493438ab15ffc 100644 (file)
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -1685,96 +1685,72 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Int8x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Mask8x16.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "LoadMask8x16FromBits", simdLoadMask(8, 16), sys.AMD64)
-       addF(simdPackage, "Mask8x16.StoreToBits", simdStoreMask(8, 16), sys.AMD64)
         addF(simdPackage, "Mask8x16FromBits", simdCvtVToMask(8, 16), sys.AMD64)
         addF(simdPackage, "Mask8x16.ToBits", simdCvtMaskToV(8, 16), sys.AMD64)
         addF(simdPackage, "Mask8x32.AsInt8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int8x32.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask8x32.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Mask8x32.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "LoadMask8x32FromBits", simdLoadMask(8, 32), sys.AMD64)
-       addF(simdPackage, "Mask8x32.StoreToBits", simdStoreMask(8, 32), sys.AMD64)
         addF(simdPackage, "Mask8x32FromBits", simdCvtVToMask(8, 32), sys.AMD64)
         addF(simdPackage, "Mask8x32.ToBits", simdCvtMaskToV(8, 32), sys.AMD64)
         addF(simdPackage, "Mask8x64.AsInt8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int8x64.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask8x64.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Mask8x64.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "LoadMask8x64FromBits", simdLoadMask(8, 64), sys.AMD64)
-       addF(simdPackage, "Mask8x64.StoreToBits", simdStoreMask(8, 64), sys.AMD64)
         addF(simdPackage, "Mask8x64FromBits", simdCvtVToMask(8, 64), sys.AMD64)
         addF(simdPackage, "Mask8x64.ToBits", simdCvtMaskToV(8, 64), sys.AMD64)
         addF(simdPackage, "Mask16x8.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int16x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask16x8.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Mask16x8.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "LoadMask16x8FromBits", simdLoadMask(16, 8), sys.AMD64)
-       addF(simdPackage, "Mask16x8.StoreToBits", simdStoreMask(16, 8), sys.AMD64)
         addF(simdPackage, "Mask16x8FromBits", simdCvtVToMask(16, 8), sys.AMD64)
         addF(simdPackage, "Mask16x8.ToBits", simdCvtMaskToV(16, 8), sys.AMD64)
         addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int16x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Mask16x16.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "LoadMask16x16FromBits", simdLoadMask(16, 16), sys.AMD64)
-       addF(simdPackage, "Mask16x16.StoreToBits", simdStoreMask(16, 16), sys.AMD64)
         addF(simdPackage, "Mask16x16FromBits", simdCvtVToMask(16, 16), sys.AMD64)
         addF(simdPackage, "Mask16x16.ToBits", simdCvtMaskToV(16, 16), sys.AMD64)
         addF(simdPackage, "Mask16x32.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int16x32.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask16x32.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Mask16x32.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "LoadMask16x32FromBits", simdLoadMask(16, 32), sys.AMD64)
-       addF(simdPackage, "Mask16x32.StoreToBits", simdStoreMask(16, 32), sys.AMD64)
         addF(simdPackage, "Mask16x32FromBits", simdCvtVToMask(16, 32), sys.AMD64)
         addF(simdPackage, "Mask16x32.ToBits", simdCvtMaskToV(16, 32), sys.AMD64)
         addF(simdPackage, "Mask32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int32x4.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Mask32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "LoadMask32x4FromBits", simdLoadMask(32, 4), sys.AMD64)
-       addF(simdPackage, "Mask32x4.StoreToBits", simdStoreMask(32, 4), sys.AMD64)
         addF(simdPackage, "Mask32x4FromBits", simdCvtVToMask(32, 4), sys.AMD64)
         addF(simdPackage, "Mask32x4.ToBits", simdCvtMaskToV(32, 4), sys.AMD64)
         addF(simdPackage, "Mask32x8.AsInt32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int32x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Mask32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "LoadMask32x8FromBits", simdLoadMask(32, 8), sys.AMD64)
-       addF(simdPackage, "Mask32x8.StoreToBits", simdStoreMask(32, 8), sys.AMD64)
         addF(simdPackage, "Mask32x8FromBits", simdCvtVToMask(32, 8), sys.AMD64)
         addF(simdPackage, "Mask32x8.ToBits", simdCvtMaskToV(32, 8), sys.AMD64)
         addF(simdPackage, "Mask32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int32x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Mask32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "LoadMask32x16FromBits", simdLoadMask(32, 16), sys.AMD64)
-       addF(simdPackage, "Mask32x16.StoreToBits", simdStoreMask(32, 16), sys.AMD64)
         addF(simdPackage, "Mask32x16FromBits", simdCvtVToMask(32, 16), sys.AMD64)
         addF(simdPackage, "Mask32x16.ToBits", simdCvtMaskToV(32, 16), sys.AMD64)
         addF(simdPackage, "Mask64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int64x2.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask64x2.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Mask64x2.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "LoadMask64x2FromBits", simdLoadMask(64, 2), sys.AMD64)
-       addF(simdPackage, "Mask64x2.StoreToBits", simdStoreMask(64, 2), sys.AMD64)
         addF(simdPackage, "Mask64x2FromBits", simdCvtVToMask(64, 2), sys.AMD64)
         addF(simdPackage, "Mask64x2.ToBits", simdCvtMaskToV(64, 2), sys.AMD64)
         addF(simdPackage, "Mask64x4.AsInt64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int64x4.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask64x4.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Mask64x4.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "LoadMask64x4FromBits", simdLoadMask(64, 4), sys.AMD64)
-       addF(simdPackage, "Mask64x4.StoreToBits", simdStoreMask(64, 4), sys.AMD64)
         addF(simdPackage, "Mask64x4FromBits", simdCvtVToMask(64, 4), sys.AMD64)
         addF(simdPackage, "Mask64x4.ToBits", simdCvtMaskToV(64, 4), sys.AMD64)
         addF(simdPackage, "Mask64x8.AsInt64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Int64x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
         addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "LoadMask64x8FromBits", simdLoadMask(64, 8), sys.AMD64)
-       addF(simdPackage, "Mask64x8.StoreToBits", simdStoreMask(64, 8), sys.AMD64)
         addF(simdPackage, "Mask64x8FromBits", simdCvtVToMask(64, 8), sys.AMD64)
         addF(simdPackage, "Mask64x8.ToBits", simdCvtMaskToV(64, 8), sys.AMD64)
  }
diff --git a/src/simd/_gen/simdgen/gen_simdIntrinsics.go b/src/simd/_gen/simdgen/gen_simdIntrinsics.go

index 4b27f7ce5f73b84af1ec8426b233f79a8150e501..a59bd9d6585bdcdb8b5fc6260951d3aeba136da2 100644 (file)
--- a/src/simd/_gen/simdgen/gen_simdIntrinsics.go
+++ b/src/simd/_gen/simdgen/gen_simdIntrinsics.go
@@ -80,8 +80,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "{{.VectorCounterpart}}.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
         addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
         addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
-       addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
-       addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
         addF(simdPackage, "{{.Name}}FromBits", simdCvtVToMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
         addF(simdPackage, "{{.Name}}.ToBits", simdCvtMaskToV({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
  {{end}}
diff --git a/src/simd/_gen/simdgen/gen_simdTypes.go b/src/simd/_gen/simdgen/gen_simdTypes.go

index f13be87f7b153055c514783cb79e8ab2b451bfdb..d443fff16ed7e8afaf198a8ca3c8b0cd9fe3b744 100644 (file)
--- a/src/simd/_gen/simdgen/gen_simdTypes.go
+++ b/src/simd/_gen/simdgen/gen_simdTypes.go
@@ -180,22 +180,6 @@ func Load{{.Name}}(y *[{{.Lanes}}]{{.Base}}) {{.Name}}
  func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
  `
  
-const simdMaskFromBitsTemplate = `
-// Load{{.Name}}FromBits constructs a {{.Name}} from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower {{.Lanes}} bits of y are used.
-//
-// CPU Features: AVX512
-//go:noescape
-func Load{{.Name}}FromBits(y *uint64) {{.Name}}
-
-// StoreToBits stores a {{.Name}} as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower {{.Lanes}} bits of y are used.
-//
-// CPU Features: AVX512
-//go:noescape
-func (x {{.Name}}) StoreToBits(y *uint64)
-`
-
  const simdMaskFromValTemplate = `
  // {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower {{.Lanes}} bits of y are used.
@@ -503,7 +487,6 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
         t := templateOf(simdTypesTemplates, "types_amd64")
         loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
         maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
-       maskFromBits := templateOf(simdMaskFromBitsTemplate, "maskFromBits_amd64")
         maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64")
  
         buffer := new(bytes.Buffer)
@@ -542,9 +525,6 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
                                         }
                                 }
                         } else {
-                               if err := maskFromBits.ExecuteTemplate(buffer, "maskFromBits_amd64", typeDef); err != nil {
-                                       panic(fmt.Errorf("failed to execute maskFromBits template for type %s: %w", typeDef.Name, err))
-                               }
                                 if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil {
                                         panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err))
                                 }
diff --git a/src/simd/internal/simd_test/simd_test.go b/src/simd/internal/simd_test/simd_test.go

index d00fcf5dd3a1d59ab3a3b5a420c33daf6579bc24..2c866ad68b31920efc438684e04f3e404ef00c4e 100644 (file)
--- a/src/simd/internal/simd_test/simd_test.go
+++ b/src/simd/internal/simd_test/simd_test.go
@@ -332,39 +332,6 @@ func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) si
         }
  }
  
-func TestBitMaskLoad(t *testing.T) {
-       if !simd.HasAVX512() {
-               t.Skip("Test requires HasAVX512, not available on this hardware")
-               return
-       }
-       var bits uint64 = 0b10
-       results := [2]int64{}
-       want := [2]int64{0, 6}
-       m := simd.LoadMask64x2FromBits(&bits)
-       simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
-       for i := range 2 {
-               if results[i] != want[i] {
-                       t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
-               }
-       }
-}
-
-func TestBitMaskStore(t *testing.T) {
-       if !simd.HasAVX512() {
-               t.Skip("Test requires HasAVX512, not available on this hardware")
-               return
-       }
-       var want uint64 = 0b101
-       var got uint64
-       x := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
-       y := simd.LoadInt32x4Slice([]int32{5, 0, 5, 0})
-       m := y.Greater(x)
-       m.StoreToBits(&got)
-       if got != want {
-               t.Errorf("Result incorrect: want %b, got %b", want, got)
-       }
-}
-
  func TestBitMaskFromBits(t *testing.T) {
         if !simd.HasAVX512() {
                 t.Skip("Test requires HasAVX512, not available on this hardware")
diff --git a/src/simd/types_amd64.go b/src/simd/types_amd64.go

index 72547c760275f2e336ee892d969fb6f954b5cc64..0136f49f9111a12deeec96857c90e52663dc98b8 100644 (file)
--- a/src/simd/types_amd64.go
+++ b/src/simd/types_amd64.go
@@ -301,22 +301,6 @@ type Mask8x16 struct {
         vals    [16]int8
  }
  
-// LoadMask8x16FromBits constructs a Mask8x16 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask8x16FromBits(y *uint64) Mask8x16
-
-// StoreToBits stores a Mask8x16 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask8x16) StoreToBits(y *uint64)
-
  // Mask8x16FromBits constructs a Mask8x16 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 16 bits of y are used.
  //
@@ -335,22 +319,6 @@ type Mask16x8 struct {
         vals    [8]int16
  }
  
-// LoadMask16x8FromBits constructs a Mask16x8 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask16x8FromBits(y *uint64) Mask16x8
-
-// StoreToBits stores a Mask16x8 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask16x8) StoreToBits(y *uint64)
-
  // Mask16x8FromBits constructs a Mask16x8 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 8 bits of y are used.
  //
@@ -369,22 +337,6 @@ type Mask32x4 struct {
         vals    [4]int32
  }
  
-// LoadMask32x4FromBits constructs a Mask32x4 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 4 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask32x4FromBits(y *uint64) Mask32x4
-
-// StoreToBits stores a Mask32x4 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 4 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask32x4) StoreToBits(y *uint64)
-
  // Mask32x4FromBits constructs a Mask32x4 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 4 bits of y are used.
  //
@@ -403,22 +355,6 @@ type Mask64x2 struct {
         vals    [2]int64
  }
  
-// LoadMask64x2FromBits constructs a Mask64x2 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 2 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask64x2FromBits(y *uint64) Mask64x2
-
-// StoreToBits stores a Mask64x2 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 2 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask64x2) StoreToBits(y *uint64)
-
  // Mask64x2FromBits constructs a Mask64x2 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 2 bits of y are used.
  //
@@ -728,22 +664,6 @@ type Mask8x32 struct {
         vals    [32]int8
  }
  
-// LoadMask8x32FromBits constructs a Mask8x32 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 32 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask8x32FromBits(y *uint64) Mask8x32
-
-// StoreToBits stores a Mask8x32 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 32 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask8x32) StoreToBits(y *uint64)
-
  // Mask8x32FromBits constructs a Mask8x32 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 32 bits of y are used.
  //
@@ -762,22 +682,6 @@ type Mask16x16 struct {
         vals     [16]int16
  }
  
-// LoadMask16x16FromBits constructs a Mask16x16 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask16x16FromBits(y *uint64) Mask16x16
-
-// StoreToBits stores a Mask16x16 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask16x16) StoreToBits(y *uint64)
-
  // Mask16x16FromBits constructs a Mask16x16 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 16 bits of y are used.
  //
@@ -796,22 +700,6 @@ type Mask32x8 struct {
         vals    [8]int32
  }
  
-// LoadMask32x8FromBits constructs a Mask32x8 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask32x8FromBits(y *uint64) Mask32x8
-
-// StoreToBits stores a Mask32x8 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask32x8) StoreToBits(y *uint64)
-
  // Mask32x8FromBits constructs a Mask32x8 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 8 bits of y are used.
  //
@@ -830,22 +718,6 @@ type Mask64x4 struct {
         vals    [4]int64
  }
  
-// LoadMask64x4FromBits constructs a Mask64x4 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 4 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask64x4FromBits(y *uint64) Mask64x4
-
-// StoreToBits stores a Mask64x4 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 4 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask64x4) StoreToBits(y *uint64)
-
  // Mask64x4FromBits constructs a Mask64x4 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 4 bits of y are used.
  //
@@ -1219,22 +1091,6 @@ type Mask8x64 struct {
         vals    [64]int8
  }
  
-// LoadMask8x64FromBits constructs a Mask8x64 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 64 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask8x64FromBits(y *uint64) Mask8x64
-
-// StoreToBits stores a Mask8x64 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 64 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask8x64) StoreToBits(y *uint64)
-
  // Mask8x64FromBits constructs a Mask8x64 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 64 bits of y are used.
  //
@@ -1253,22 +1109,6 @@ type Mask16x32 struct {
         vals     [32]int16
  }
  
-// LoadMask16x32FromBits constructs a Mask16x32 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 32 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask16x32FromBits(y *uint64) Mask16x32
-
-// StoreToBits stores a Mask16x32 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 32 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask16x32) StoreToBits(y *uint64)
-
  // Mask16x32FromBits constructs a Mask16x32 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 32 bits of y are used.
  //
@@ -1287,22 +1127,6 @@ type Mask32x16 struct {
         vals     [16]int32
  }
  
-// LoadMask32x16FromBits constructs a Mask32x16 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask32x16FromBits(y *uint64) Mask32x16
-
-// StoreToBits stores a Mask32x16 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask32x16) StoreToBits(y *uint64)
-
  // Mask32x16FromBits constructs a Mask32x16 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 16 bits of y are used.
  //
@@ -1321,22 +1145,6 @@ type Mask64x8 struct {
         vals    [8]int64
  }
  
-// LoadMask64x8FromBits constructs a Mask64x8 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask64x8FromBits(y *uint64) Mask64x8
-
-// StoreToBits stores a Mask64x8 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask64x8) StoreToBits(y *uint64)
-
  // Mask64x8FromBits constructs a Mask64x8 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
  // Only the lower 8 bits of y are used.
  //
author	Junyang Shao <shaojunyang@google.com>
	Fri, 10 Oct 2025 17:42:59 +0000 (17:42 +0000)
committer	Junyang Shao <shaojunyang@google.com>
	Tue, 14 Oct 2025 19:26:37 +0000 (12:26 -0700)
src/cmd/compile/internal/ssa/_gen/AMD64.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/genericOps.go		patch \| blob \| history
src/cmd/compile/internal/ssa/opGen.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteAMD64.go		patch \| blob \| history
src/cmd/compile/internal/ssagen/intrinsics.go		patch \| blob \| history
src/cmd/compile/internal/ssagen/simdintrinsics.go		patch \| blob \| history
src/simd/_gen/simdgen/gen_simdIntrinsics.go		patch \| blob \| history
src/simd/_gen/simdgen/gen_simdTypes.go		patch \| blob \| history
src/simd/internal/simd_test/simd_test.go		patch \| blob \| history
src/simd/types_amd64.go		patch \| blob \| history