// SIMD lowering rules
-// Mask loads
-(LoadMask8x16 <t> ptr mem) => (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask8x32 <t> ptr mem) => (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask8x64 <t> ptr mem) => (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(LoadMask16x8 <t> ptr mem) => (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask16x16 <t> ptr mem) => (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask16x32 <t> ptr mem) => (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(LoadMask32x4 <t> ptr mem) => (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask32x8 <t> ptr mem) => (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask32x16 <t> ptr mem) => (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(LoadMask64x2 <t> ptr mem) => (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask64x4 <t> ptr mem) => (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask64x8 <t> ptr mem) => (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(StoreMask8x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
-(StoreMask8x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
-(StoreMask8x64 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
-
-(StoreMask16x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
-(StoreMask16x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
-(StoreMask16x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
-
-(StoreMask32x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
-(StoreMask32x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
-(StoreMask32x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
-
-(StoreMask64x2 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
-(StoreMask64x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
-(StoreMask64x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
-
-// TODO is this correct? Should we just do it all from 64-bits?
-
// Mask conversions
// integers to masks
(Cvt16toMask8x16 <t> x) => (VPMOVMToVec8x16 <types.TypeVec128> (KMOVWk <t> x))
{name: "PrefetchCacheStreamed", argLength: 2, hasSideEffects: true}, // Do non-temporal or streamed prefetch arg0 to cache. arg0=addr, arg1=memory.
// SIMD
- {name: "ZeroSIMD", argLength: 0}, // zero value of a vector
- {name: "LoadMask8x16", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask8x32", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask8x64", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask16x8", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask16x16", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask16x32", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask32x4", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask32x8", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask32x16", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask64x2", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask64x4", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask64x8", argLength: 2}, // arg0 = ptr, arg1 = mem
-
- {name: "StoreMask8x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask8x32", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask8x64", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask16x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask16x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask16x32", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask32x4", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask32x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask32x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask64x2", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask64x4", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask64x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "ZeroSIMD", argLength: 0}, // zero value of a vector
// Convert integers to masks
{name: "Cvt16toMask8x16", argLength: 1}, // arg0 = integer mask value
OpPrefetchCache
OpPrefetchCacheStreamed
OpZeroSIMD
- OpLoadMask8x16
- OpLoadMask8x32
- OpLoadMask8x64
- OpLoadMask16x8
- OpLoadMask16x16
- OpLoadMask16x32
- OpLoadMask32x4
- OpLoadMask32x8
- OpLoadMask32x16
- OpLoadMask64x2
- OpLoadMask64x4
- OpLoadMask64x8
- OpStoreMask8x16
- OpStoreMask8x32
- OpStoreMask8x64
- OpStoreMask16x8
- OpStoreMask16x16
- OpStoreMask16x32
- OpStoreMask32x4
- OpStoreMask32x8
- OpStoreMask32x16
- OpStoreMask64x2
- OpStoreMask64x4
- OpStoreMask64x8
OpCvt16toMask8x16
OpCvt32toMask8x32
OpCvt64toMask8x64
argLen: 0,
generic: true,
},
- {
- name: "LoadMask8x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask8x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask8x64",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask16x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask32x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask64x2",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask64x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask64x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "StoreMask8x16",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask8x32",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask8x64",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask16x8",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask16x16",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask16x32",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask32x4",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask32x8",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask32x16",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask64x2",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask64x4",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask64x8",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
{
name: "Cvt16toMask8x16",
argLen: 1,
return rewriteValueAMD64_OpLessUint8x64(v)
case OpLoad:
return rewriteValueAMD64_OpLoad(v)
- case OpLoadMask16x16:
- return rewriteValueAMD64_OpLoadMask16x16(v)
- case OpLoadMask16x32:
- return rewriteValueAMD64_OpLoadMask16x32(v)
- case OpLoadMask16x8:
- return rewriteValueAMD64_OpLoadMask16x8(v)
- case OpLoadMask32x16:
- return rewriteValueAMD64_OpLoadMask32x16(v)
- case OpLoadMask32x4:
- return rewriteValueAMD64_OpLoadMask32x4(v)
- case OpLoadMask32x8:
- return rewriteValueAMD64_OpLoadMask32x8(v)
- case OpLoadMask64x2:
- return rewriteValueAMD64_OpLoadMask64x2(v)
- case OpLoadMask64x4:
- return rewriteValueAMD64_OpLoadMask64x4(v)
- case OpLoadMask64x8:
- return rewriteValueAMD64_OpLoadMask64x8(v)
- case OpLoadMask8x16:
- return rewriteValueAMD64_OpLoadMask8x16(v)
- case OpLoadMask8x32:
- return rewriteValueAMD64_OpLoadMask8x32(v)
- case OpLoadMask8x64:
- return rewriteValueAMD64_OpLoadMask8x64(v)
case OpLoadMasked16:
return rewriteValueAMD64_OpLoadMasked16(v)
case OpLoadMasked32:
return true
case OpStore:
return rewriteValueAMD64_OpStore(v)
- case OpStoreMask16x16:
- return rewriteValueAMD64_OpStoreMask16x16(v)
- case OpStoreMask16x32:
- return rewriteValueAMD64_OpStoreMask16x32(v)
- case OpStoreMask16x8:
- return rewriteValueAMD64_OpStoreMask16x8(v)
- case OpStoreMask32x16:
- return rewriteValueAMD64_OpStoreMask32x16(v)
- case OpStoreMask32x4:
- return rewriteValueAMD64_OpStoreMask32x4(v)
- case OpStoreMask32x8:
- return rewriteValueAMD64_OpStoreMask32x8(v)
- case OpStoreMask64x2:
- return rewriteValueAMD64_OpStoreMask64x2(v)
- case OpStoreMask64x4:
- return rewriteValueAMD64_OpStoreMask64x4(v)
- case OpStoreMask64x8:
- return rewriteValueAMD64_OpStoreMask64x8(v)
- case OpStoreMask8x16:
- return rewriteValueAMD64_OpStoreMask8x16(v)
- case OpStoreMask8x32:
- return rewriteValueAMD64_OpStoreMask8x32(v)
- case OpStoreMask8x64:
- return rewriteValueAMD64_OpStoreMask8x64(v)
case OpStoreMasked16:
return rewriteValueAMD64_OpStoreMasked16(v)
case OpStoreMasked32:
}
return false
}
-func rewriteValueAMD64_OpLoadMask16x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask16x16 <t> ptr mem)
- // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec16x16)
- v.Type = types.TypeVec256
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask16x32(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask16x32 <t> ptr mem)
- // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec16x32)
- v.Type = types.TypeVec512
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask16x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask16x8 <t> ptr mem)
- // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec16x8)
- v.Type = types.TypeVec128
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask32x16 <t> ptr mem)
- // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec32x16)
- v.Type = types.TypeVec512
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask32x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask32x4 <t> ptr mem)
- // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec32x4)
- v.Type = types.TypeVec128
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask32x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask32x8 <t> ptr mem)
- // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec32x8)
- v.Type = types.TypeVec256
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask64x2(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask64x2 <t> ptr mem)
- // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec64x2)
- v.Type = types.TypeVec128
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask64x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask64x4 <t> ptr mem)
- // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec64x4)
- v.Type = types.TypeVec256
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask64x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask64x8 <t> ptr mem)
- // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec64x8)
- v.Type = types.TypeVec512
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask8x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask8x16 <t> ptr mem)
- // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec8x16)
- v.Type = types.TypeVec128
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask8x32(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask8x32 <t> ptr mem)
- // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec8x32)
- v.Type = types.TypeVec256
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask8x64 <t> ptr mem)
- // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec8x64)
- v.Type = types.TypeVec512
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
func rewriteValueAMD64_OpLoadMasked16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
}
return false
}
-func rewriteValueAMD64_OpStoreMask16x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask16x16 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask16x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask16x32 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask16x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask16x8 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask32x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask32x16 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask32x4(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask32x4 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask32x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask32x8 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask64x2(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask64x2 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask64x4(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask64x4 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask64x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask64x8 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask8x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask8x16 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask8x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask8x32 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask8x64 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
func rewriteValueAMD64_OpStoreMasked16(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
}
}
-var loadMaskOpcodes = map[int]map[int]ssa.Op{
- 8: {16: ssa.OpLoadMask8x16, 32: ssa.OpLoadMask8x32, 64: ssa.OpLoadMask8x64},
- 16: {8: ssa.OpLoadMask16x8, 16: ssa.OpLoadMask16x16, 32: ssa.OpLoadMask16x32},
- 32: {4: ssa.OpLoadMask32x4, 8: ssa.OpLoadMask32x8, 16: ssa.OpLoadMask32x16},
- 64: {2: ssa.OpLoadMask64x2, 4: ssa.OpLoadMask64x4, 8: ssa.OpLoadMask64x8},
-}
-
var cvtVToMaskOpcodes = map[int]map[int]ssa.Op{
8: {16: ssa.OpCvt16toMask8x16, 32: ssa.OpCvt32toMask8x32, 64: ssa.OpCvt64toMask8x64},
16: {8: ssa.OpCvt8toMask16x8, 16: ssa.OpCvt16toMask16x16, 32: ssa.OpCvt32toMask16x32},
64: {2: ssa.OpCvtMask64x2to8, 4: ssa.OpCvtMask64x4to8, 8: ssa.OpCvtMask64x8to8},
}
-func simdLoadMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
- return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
- op := loadMaskOpcodes[elemBits][lanes]
- if op == 0 {
- panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
- }
- return s.newValue2(op, types.TypeMask, args[0], s.mem())
- }
-}
-
-func simdStoreMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
- return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
- opCodes := map[int]map[int]ssa.Op{
- 8: {16: ssa.OpStoreMask8x16, 32: ssa.OpStoreMask8x32, 64: ssa.OpStoreMask8x64},
- 16: {8: ssa.OpStoreMask16x8, 16: ssa.OpStoreMask16x16, 32: ssa.OpStoreMask16x32},
- 32: {4: ssa.OpStoreMask32x4, 8: ssa.OpStoreMask32x8, 16: ssa.OpStoreMask32x16},
- 64: {2: ssa.OpStoreMask64x2, 4: ssa.OpStoreMask64x4, 8: ssa.OpStoreMask64x8},
- }
- op := opCodes[elemBits][lanes]
- if op == 0 {
- panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
- }
- s.vars[memVar] = s.newValue3A(op, types.TypeMem, types.TypeMask, args[1], args[0], s.mem())
- return nil
- }
-}
-
func simdCvtVToMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
op := cvtVToMaskOpcodes[elemBits][lanes]
addF(simdPackage, "Int8x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask8x16.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "LoadMask8x16FromBits", simdLoadMask(8, 16), sys.AMD64)
- addF(simdPackage, "Mask8x16.StoreToBits", simdStoreMask(8, 16), sys.AMD64)
addF(simdPackage, "Mask8x16FromBits", simdCvtVToMask(8, 16), sys.AMD64)
addF(simdPackage, "Mask8x16.ToBits", simdCvtMaskToV(8, 16), sys.AMD64)
addF(simdPackage, "Mask8x32.AsInt8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int8x32.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask8x32.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask8x32.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "LoadMask8x32FromBits", simdLoadMask(8, 32), sys.AMD64)
- addF(simdPackage, "Mask8x32.StoreToBits", simdStoreMask(8, 32), sys.AMD64)
addF(simdPackage, "Mask8x32FromBits", simdCvtVToMask(8, 32), sys.AMD64)
addF(simdPackage, "Mask8x32.ToBits", simdCvtMaskToV(8, 32), sys.AMD64)
addF(simdPackage, "Mask8x64.AsInt8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int8x64.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask8x64.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask8x64.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "LoadMask8x64FromBits", simdLoadMask(8, 64), sys.AMD64)
- addF(simdPackage, "Mask8x64.StoreToBits", simdStoreMask(8, 64), sys.AMD64)
addF(simdPackage, "Mask8x64FromBits", simdCvtVToMask(8, 64), sys.AMD64)
addF(simdPackage, "Mask8x64.ToBits", simdCvtMaskToV(8, 64), sys.AMD64)
addF(simdPackage, "Mask16x8.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int16x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask16x8.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask16x8.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "LoadMask16x8FromBits", simdLoadMask(16, 8), sys.AMD64)
- addF(simdPackage, "Mask16x8.StoreToBits", simdStoreMask(16, 8), sys.AMD64)
addF(simdPackage, "Mask16x8FromBits", simdCvtVToMask(16, 8), sys.AMD64)
addF(simdPackage, "Mask16x8.ToBits", simdCvtMaskToV(16, 8), sys.AMD64)
addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int16x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask16x16.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "LoadMask16x16FromBits", simdLoadMask(16, 16), sys.AMD64)
- addF(simdPackage, "Mask16x16.StoreToBits", simdStoreMask(16, 16), sys.AMD64)
addF(simdPackage, "Mask16x16FromBits", simdCvtVToMask(16, 16), sys.AMD64)
addF(simdPackage, "Mask16x16.ToBits", simdCvtMaskToV(16, 16), sys.AMD64)
addF(simdPackage, "Mask16x32.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int16x32.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask16x32.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask16x32.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "LoadMask16x32FromBits", simdLoadMask(16, 32), sys.AMD64)
- addF(simdPackage, "Mask16x32.StoreToBits", simdStoreMask(16, 32), sys.AMD64)
addF(simdPackage, "Mask16x32FromBits", simdCvtVToMask(16, 32), sys.AMD64)
addF(simdPackage, "Mask16x32.ToBits", simdCvtMaskToV(16, 32), sys.AMD64)
addF(simdPackage, "Mask32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int32x4.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "LoadMask32x4FromBits", simdLoadMask(32, 4), sys.AMD64)
- addF(simdPackage, "Mask32x4.StoreToBits", simdStoreMask(32, 4), sys.AMD64)
addF(simdPackage, "Mask32x4FromBits", simdCvtVToMask(32, 4), sys.AMD64)
addF(simdPackage, "Mask32x4.ToBits", simdCvtMaskToV(32, 4), sys.AMD64)
addF(simdPackage, "Mask32x8.AsInt32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int32x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "LoadMask32x8FromBits", simdLoadMask(32, 8), sys.AMD64)
- addF(simdPackage, "Mask32x8.StoreToBits", simdStoreMask(32, 8), sys.AMD64)
addF(simdPackage, "Mask32x8FromBits", simdCvtVToMask(32, 8), sys.AMD64)
addF(simdPackage, "Mask32x8.ToBits", simdCvtMaskToV(32, 8), sys.AMD64)
addF(simdPackage, "Mask32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int32x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "LoadMask32x16FromBits", simdLoadMask(32, 16), sys.AMD64)
- addF(simdPackage, "Mask32x16.StoreToBits", simdStoreMask(32, 16), sys.AMD64)
addF(simdPackage, "Mask32x16FromBits", simdCvtVToMask(32, 16), sys.AMD64)
addF(simdPackage, "Mask32x16.ToBits", simdCvtMaskToV(32, 16), sys.AMD64)
addF(simdPackage, "Mask64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int64x2.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask64x2.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask64x2.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "LoadMask64x2FromBits", simdLoadMask(64, 2), sys.AMD64)
- addF(simdPackage, "Mask64x2.StoreToBits", simdStoreMask(64, 2), sys.AMD64)
addF(simdPackage, "Mask64x2FromBits", simdCvtVToMask(64, 2), sys.AMD64)
addF(simdPackage, "Mask64x2.ToBits", simdCvtMaskToV(64, 2), sys.AMD64)
addF(simdPackage, "Mask64x4.AsInt64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int64x4.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask64x4.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask64x4.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "LoadMask64x4FromBits", simdLoadMask(64, 4), sys.AMD64)
- addF(simdPackage, "Mask64x4.StoreToBits", simdStoreMask(64, 4), sys.AMD64)
addF(simdPackage, "Mask64x4FromBits", simdCvtVToMask(64, 4), sys.AMD64)
addF(simdPackage, "Mask64x4.ToBits", simdCvtMaskToV(64, 4), sys.AMD64)
addF(simdPackage, "Mask64x8.AsInt64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int64x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "LoadMask64x8FromBits", simdLoadMask(64, 8), sys.AMD64)
- addF(simdPackage, "Mask64x8.StoreToBits", simdStoreMask(64, 8), sys.AMD64)
addF(simdPackage, "Mask64x8FromBits", simdCvtVToMask(64, 8), sys.AMD64)
addF(simdPackage, "Mask64x8.ToBits", simdCvtMaskToV(64, 8), sys.AMD64)
}
addF(simdPackage, "{{.VectorCounterpart}}.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "{{.Name}}.And", opLen2(ssa.OpAnd{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
addF(simdPackage, "{{.Name}}.Or", opLen2(ssa.OpOr{{.ReshapedVectorWithAndOr}}, types.TypeVec{{.Size}}), sys.AMD64)
- addF(simdPackage, "Load{{.Name}}FromBits", simdLoadMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
- addF(simdPackage, "{{.Name}}.StoreToBits", simdStoreMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
addF(simdPackage, "{{.Name}}FromBits", simdCvtVToMask({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
addF(simdPackage, "{{.Name}}.ToBits", simdCvtMaskToV({{.ElemBits}}, {{.Lanes}}), sys.AMD64)
{{end}}
func (x {{.Name}}) Store(y *[{{.Lanes}}]{{.Base}})
`
-const simdMaskFromBitsTemplate = `
-// Load{{.Name}}FromBits constructs a {{.Name}} from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower {{.Lanes}} bits of y are used.
-//
-// CPU Features: AVX512
-//go:noescape
-func Load{{.Name}}FromBits(y *uint64) {{.Name}}
-
-// StoreToBits stores a {{.Name}} as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower {{.Lanes}} bits of y are used.
-//
-// CPU Features: AVX512
-//go:noescape
-func (x {{.Name}}) StoreToBits(y *uint64)
-`
-
const simdMaskFromValTemplate = `
// {{.Name}}FromBits constructs a {{.Name}} from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower {{.Lanes}} bits of y are used.
t := templateOf(simdTypesTemplates, "types_amd64")
loadStore := templateOf(simdLoadStoreTemplate, "loadstore_amd64")
maskedLoadStore := templateOf(simdMaskedLoadStoreTemplate, "maskedloadstore_amd64")
- maskFromBits := templateOf(simdMaskFromBitsTemplate, "maskFromBits_amd64")
maskFromVal := templateOf(simdMaskFromValTemplate, "maskFromVal_amd64")
buffer := new(bytes.Buffer)
}
}
} else {
- if err := maskFromBits.ExecuteTemplate(buffer, "maskFromBits_amd64", typeDef); err != nil {
- panic(fmt.Errorf("failed to execute maskFromBits template for type %s: %w", typeDef.Name, err))
- }
if err := maskFromVal.ExecuteTemplate(buffer, "maskFromVal_amd64", typeDef); err != nil {
panic(fmt.Errorf("failed to execute maskFromVal template for type %s: %w", typeDef.Name, err))
}
}
}
-func TestBitMaskLoad(t *testing.T) {
- if !simd.HasAVX512() {
- t.Skip("Test requires HasAVX512, not available on this hardware")
- return
- }
- var bits uint64 = 0b10
- results := [2]int64{}
- want := [2]int64{0, 6}
- m := simd.LoadMask64x2FromBits(&bits)
- simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results)
- for i := range 2 {
- if results[i] != want[i] {
- t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i])
- }
- }
-}
-
-func TestBitMaskStore(t *testing.T) {
- if !simd.HasAVX512() {
- t.Skip("Test requires HasAVX512, not available on this hardware")
- return
- }
- var want uint64 = 0b101
- var got uint64
- x := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4})
- y := simd.LoadInt32x4Slice([]int32{5, 0, 5, 0})
- m := y.Greater(x)
- m.StoreToBits(&got)
- if got != want {
- t.Errorf("Result incorrect: want %b, got %b", want, got)
- }
-}
-
func TestBitMaskFromBits(t *testing.T) {
if !simd.HasAVX512() {
t.Skip("Test requires HasAVX512, not available on this hardware")
vals [16]int8
}
-// LoadMask8x16FromBits constructs a Mask8x16 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask8x16FromBits(y *uint64) Mask8x16
-
-// StoreToBits stores a Mask8x16 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask8x16) StoreToBits(y *uint64)
-
// Mask8x16FromBits constructs a Mask8x16 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 16 bits of y are used.
//
vals [8]int16
}
-// LoadMask16x8FromBits constructs a Mask16x8 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask16x8FromBits(y *uint64) Mask16x8
-
-// StoreToBits stores a Mask16x8 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask16x8) StoreToBits(y *uint64)
-
// Mask16x8FromBits constructs a Mask16x8 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 8 bits of y are used.
//
vals [4]int32
}
-// LoadMask32x4FromBits constructs a Mask32x4 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 4 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask32x4FromBits(y *uint64) Mask32x4
-
-// StoreToBits stores a Mask32x4 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 4 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask32x4) StoreToBits(y *uint64)
-
// Mask32x4FromBits constructs a Mask32x4 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 4 bits of y are used.
//
vals [2]int64
}
-// LoadMask64x2FromBits constructs a Mask64x2 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 2 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask64x2FromBits(y *uint64) Mask64x2
-
-// StoreToBits stores a Mask64x2 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 2 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask64x2) StoreToBits(y *uint64)
-
// Mask64x2FromBits constructs a Mask64x2 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 2 bits of y are used.
//
vals [32]int8
}
-// LoadMask8x32FromBits constructs a Mask8x32 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 32 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask8x32FromBits(y *uint64) Mask8x32
-
-// StoreToBits stores a Mask8x32 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 32 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask8x32) StoreToBits(y *uint64)
-
// Mask8x32FromBits constructs a Mask8x32 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 32 bits of y are used.
//
vals [16]int16
}
-// LoadMask16x16FromBits constructs a Mask16x16 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask16x16FromBits(y *uint64) Mask16x16
-
-// StoreToBits stores a Mask16x16 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask16x16) StoreToBits(y *uint64)
-
// Mask16x16FromBits constructs a Mask16x16 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 16 bits of y are used.
//
vals [8]int32
}
-// LoadMask32x8FromBits constructs a Mask32x8 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask32x8FromBits(y *uint64) Mask32x8
-
-// StoreToBits stores a Mask32x8 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask32x8) StoreToBits(y *uint64)
-
// Mask32x8FromBits constructs a Mask32x8 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 8 bits of y are used.
//
vals [4]int64
}
-// LoadMask64x4FromBits constructs a Mask64x4 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 4 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask64x4FromBits(y *uint64) Mask64x4
-
-// StoreToBits stores a Mask64x4 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 4 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask64x4) StoreToBits(y *uint64)
-
// Mask64x4FromBits constructs a Mask64x4 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 4 bits of y are used.
//
vals [64]int8
}
-// LoadMask8x64FromBits constructs a Mask8x64 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 64 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask8x64FromBits(y *uint64) Mask8x64
-
-// StoreToBits stores a Mask8x64 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 64 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask8x64) StoreToBits(y *uint64)
-
// Mask8x64FromBits constructs a Mask8x64 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 64 bits of y are used.
//
vals [32]int16
}
-// LoadMask16x32FromBits constructs a Mask16x32 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 32 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask16x32FromBits(y *uint64) Mask16x32
-
-// StoreToBits stores a Mask16x32 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 32 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask16x32) StoreToBits(y *uint64)
-
// Mask16x32FromBits constructs a Mask16x32 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 32 bits of y are used.
//
vals [16]int32
}
-// LoadMask32x16FromBits constructs a Mask32x16 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask32x16FromBits(y *uint64) Mask32x16
-
-// StoreToBits stores a Mask32x16 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 16 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask32x16) StoreToBits(y *uint64)
-
// Mask32x16FromBits constructs a Mask32x16 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 16 bits of y are used.
//
vals [8]int64
}
-// LoadMask64x8FromBits constructs a Mask64x8 from a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func LoadMask64x8FromBits(y *uint64) Mask64x8
-
-// StoreToBits stores a Mask64x8 as a bitmap, where 1 means set for the indexed element, 0 means unset.
-// Only the lower 8 bits of y are used.
-//
-// CPU Features: AVX512
-//
-//go:noescape
-func (x Mask64x8) StoreToBits(y *uint64)
-
// Mask64x8FromBits constructs a Mask64x8 from a bitmap value, where 1 means set for the indexed element, 0 means unset.
// Only the lower 8 bits of y are used.
//