func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
var p *obj.Prog
switch v.Op {
- case ssa.OpAMD64VPABSB128,
+ case ssa.OpAMD64VAESIMC128,
+ ssa.OpAMD64VPABSB128,
ssa.OpAMD64VPABSB256,
ssa.OpAMD64VPABSB512,
ssa.OpAMD64VPABSW128,
ssa.OpAMD64VSQRTPD512:
p = simdV11(s, v)
- case ssa.OpAMD64VADDPS128,
+ case ssa.OpAMD64VAESDECLAST128,
+ ssa.OpAMD64VAESDECLAST256,
+ ssa.OpAMD64VAESDEC128,
+ ssa.OpAMD64VAESDEC256,
+ ssa.OpAMD64VAESENCLAST128,
+ ssa.OpAMD64VAESENCLAST256,
+ ssa.OpAMD64VAESENC128,
+ ssa.OpAMD64VAESENC256,
+ ssa.OpAMD64VADDPS128,
ssa.OpAMD64VADDPS256,
ssa.OpAMD64VADDPS512,
ssa.OpAMD64VADDPD128,
ssa.OpAMD64VPBLENDVB256:
p = simdV31(s, v)
- case ssa.OpAMD64VROUNDPS128,
+ case ssa.OpAMD64VAESKEYGENASSIST128,
+ ssa.OpAMD64VROUNDPS128,
ssa.OpAMD64VROUNDPS256,
ssa.OpAMD64VROUNDPD128,
ssa.OpAMD64VROUNDPD256,
// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
+(AESDecryptLastRoundUint8x16 ...) => (VAESDECLAST128 ...)
+(AESDecryptLastRoundUint8x32 ...) => (VAESDECLAST256 ...)
+(AESDecryptRoundUint8x16 ...) => (VAESDEC128 ...)
+(AESDecryptRoundUint8x32 ...) => (VAESDEC256 ...)
+(AESEncryptLastRoundUint8x16 ...) => (VAESENCLAST128 ...)
+(AESEncryptLastRoundUint8x32 ...) => (VAESENCLAST256 ...)
+(AESEncryptRoundUint8x16 ...) => (VAESENC128 ...)
+(AESEncryptRoundUint8x32 ...) => (VAESENC256 ...)
+(AESInvMixColumnsUint32x4 ...) => (VAESIMC128 ...)
+(AESRoundKeyGenAssistUint32x4 ...) => (VAESKEYGENASSIST128 ...)
(AbsInt8x16 ...) => (VPABSB128 ...)
(AbsInt8x32 ...) => (VPABSB256 ...)
(AbsInt8x64 ...) => (VPABSB512 ...)
{name: "VADDSUBPD256", argLength: 2, reg: v21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VAESDEC128", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VAESDEC256", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VAESDECLAST128", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VAESDECLAST256", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VAESENC128", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VAESENC256", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VAESENCLAST128", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VAESENCLAST256", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VAESIMC128", argLength: 1, reg: v11, asm: "VAESIMC", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VBROADCASTSDMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VSUBPSMasked128", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VSUBPSMasked256", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VSUBPSMasked512", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VAESKEYGENASSIST128", argLength: 1, reg: v11, asm: "VAESKEYGENASSIST", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VROUNDPS128", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VROUNDPS256", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VROUNDPD128", argLength: 1, reg: v11, asm: "VROUNDPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
func simdGenericOps() []opData {
return []opData{
+ {name: "AESDecryptLastRoundUint8x16", argLength: 2, commutative: false},
+ {name: "AESDecryptLastRoundUint8x32", argLength: 2, commutative: false},
+ {name: "AESDecryptRoundUint8x16", argLength: 2, commutative: false},
+ {name: "AESDecryptRoundUint8x32", argLength: 2, commutative: false},
+ {name: "AESEncryptLastRoundUint8x16", argLength: 2, commutative: false},
+ {name: "AESEncryptLastRoundUint8x32", argLength: 2, commutative: false},
+ {name: "AESEncryptRoundUint8x16", argLength: 2, commutative: false},
+ {name: "AESEncryptRoundUint8x32", argLength: 2, commutative: false},
+ {name: "AESInvMixColumnsUint32x4", argLength: 1, commutative: false},
{name: "AbsInt8x16", argLength: 1, commutative: false},
{name: "AbsInt8x32", argLength: 1, commutative: false},
{name: "AbsInt8x64", argLength: 1, commutative: false},
{name: "moveMaskedUint16x32", argLength: 2, commutative: false},
{name: "moveMaskedUint32x16", argLength: 2, commutative: false},
{name: "moveMaskedUint64x8", argLength: 2, commutative: false},
+ {name: "AESRoundKeyGenAssistUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
{name: "CeilScaledFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
{name: "CeilScaledFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
{name: "CeilScaledFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
OpAMD64VADDSUBPD256
OpAMD64VADDSUBPS128
OpAMD64VADDSUBPS256
+ OpAMD64VAESDEC128
+ OpAMD64VAESDEC256
+ OpAMD64VAESDECLAST128
+ OpAMD64VAESDECLAST256
+ OpAMD64VAESENC128
+ OpAMD64VAESENC256
+ OpAMD64VAESENCLAST128
+ OpAMD64VAESENCLAST256
+ OpAMD64VAESIMC128
OpAMD64VBROADCASTSD256
OpAMD64VBROADCASTSD512
OpAMD64VBROADCASTSDMasked256
OpAMD64VSUBPSMasked128
OpAMD64VSUBPSMasked256
OpAMD64VSUBPSMasked512
+ OpAMD64VAESKEYGENASSIST128
OpAMD64VROUNDPS128
OpAMD64VROUNDPS256
OpAMD64VROUNDPD128
OpCvtMask64x4to8
OpCvtMask64x8to8
OpIsZeroVec
+ OpAESDecryptLastRoundUint8x16
+ OpAESDecryptLastRoundUint8x32
+ OpAESDecryptRoundUint8x16
+ OpAESDecryptRoundUint8x32
+ OpAESEncryptLastRoundUint8x16
+ OpAESEncryptLastRoundUint8x32
+ OpAESEncryptRoundUint8x16
+ OpAESEncryptRoundUint8x32
+ OpAESInvMixColumnsUint32x4
OpAbsInt8x16
OpAbsInt8x32
OpAbsInt8x64
OpmoveMaskedUint16x32
OpmoveMaskedUint32x16
OpmoveMaskedUint64x8
+ OpAESRoundKeyGenAssistUint32x4
OpCeilScaledFloat32x4
OpCeilScaledFloat32x8
OpCeilScaledFloat32x16
},
},
},
+ {
+ name: "VAESDEC128",
+ argLen: 2,
+ asm: x86.AVAESDEC,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VAESDEC256",
+ argLen: 2,
+ asm: x86.AVAESDEC,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VAESDECLAST128",
+ argLen: 2,
+ asm: x86.AVAESDECLAST,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VAESDECLAST256",
+ argLen: 2,
+ asm: x86.AVAESDECLAST,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VAESENC128",
+ argLen: 2,
+ asm: x86.AVAESENC,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VAESENC256",
+ argLen: 2,
+ asm: x86.AVAESENC,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VAESENCLAST128",
+ argLen: 2,
+ asm: x86.AVAESENCLAST,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
+ {
+ name: "VAESENCLAST256",
+ argLen: 2,
+ asm: x86.AVAESENCLAST,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ outputs: []outputInfo{
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ },
+ },
+ },
+ {
+ name: "VAESIMC128",
+ argLen: 1,
+ asm: x86.AVAESIMC,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VBROADCASTSD256",
argLen: 1,
},
},
},
+ {
+ name: "VAESKEYGENASSIST128",
+ auxType: auxUInt8,
+ argLen: 1,
+ asm: x86.AVAESKEYGENASSIST,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VROUNDPS128",
auxType: auxUInt8,
argLen: 1,
generic: true,
},
+ {
+ name: "AESDecryptLastRoundUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AESDecryptLastRoundUint8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AESDecryptRoundUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AESDecryptRoundUint8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AESEncryptLastRoundUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AESEncryptLastRoundUint8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AESEncryptRoundUint8x16",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AESEncryptRoundUint8x32",
+ argLen: 2,
+ generic: true,
+ },
+ {
+ name: "AESInvMixColumnsUint32x4",
+ argLen: 1,
+ generic: true,
+ },
{
name: "AbsInt8x16",
argLen: 1,
argLen: 2,
generic: true,
},
+ {
+ name: "AESRoundKeyGenAssistUint32x4",
+ auxType: auxUInt8,
+ argLen: 1,
+ generic: true,
+ },
{
name: "CeilScaledFloat32x4",
auxType: auxUInt8,
func rewriteValueAMD64(v *Value) bool {
switch v.Op {
+ case OpAESDecryptLastRoundUint8x16:
+ v.Op = OpAMD64VAESDECLAST128
+ return true
+ case OpAESDecryptLastRoundUint8x32:
+ v.Op = OpAMD64VAESDECLAST256
+ return true
+ case OpAESDecryptRoundUint8x16:
+ v.Op = OpAMD64VAESDEC128
+ return true
+ case OpAESDecryptRoundUint8x32:
+ v.Op = OpAMD64VAESDEC256
+ return true
+ case OpAESEncryptLastRoundUint8x16:
+ v.Op = OpAMD64VAESENCLAST128
+ return true
+ case OpAESEncryptLastRoundUint8x32:
+ v.Op = OpAMD64VAESENCLAST256
+ return true
+ case OpAESEncryptRoundUint8x16:
+ v.Op = OpAMD64VAESENC128
+ return true
+ case OpAESEncryptRoundUint8x32:
+ v.Op = OpAMD64VAESENC256
+ return true
+ case OpAESInvMixColumnsUint32x4:
+ v.Op = OpAMD64VAESIMC128
+ return true
+ case OpAESRoundKeyGenAssistUint32x4:
+ v.Op = OpAMD64VAESKEYGENASSIST128
+ return true
case OpAMD64ADCQ:
return rewriteValueAMD64_OpAMD64ADCQ(v)
case OpAMD64ADCQconst:
const simdPackage = "simd"
func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
+ addF(simdPackage, "Uint8x16.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x16.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x16.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x16.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x32.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x4.AESInvMixColumns", opLen1(ssa.OpAESInvMixColumnsUint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.AESRoundKeyGenAssist", opLen1Imm8(ssa.OpAESRoundKeyGenAssistUint32x4, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int8x16.Abs", opLen1(ssa.OpAbsInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Abs", opLen1(ssa.OpAbsInt8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x64.Abs", opLen1(ssa.OpAbsInt8x64, types.TypeVec512), sys.AMD64)
HasAVX512DQ bool
HasAVX512VL bool
HasAVX512GFNI bool
+ HasAVX512VAES bool
HasAVX512VNNI bool
HasAVX512VBMI bool
HasAVX512VBMI2 bool
cpuid_AVX512VBMI2 = 1 << 6
cpuid_SSSE3 = 1 << 9
cpuid_AVX512GFNI = 1 << 8
+ cpuid_AVX512VAES = 1 << 9
cpuid_AVX512VNNI = 1 << 11
cpuid_AVX512BITALG = 1 << 12
cpuid_FMA = 1 << 12
X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ)
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512VBMI)
X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2)
+ X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES)
X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI)
X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI)
}
featureSet := make(map[featureKey]struct{})
for _, op := range ops {
- featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{}
+ if !strings.Contains(op.CPUFeature, ",") {
+ featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{}
+ }
+ // Don't generate feature checks for composite features.
}
features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
commutative: false
documentation: !string |-
// NAME counts the leading zeros of each element in x.
+- go: AESEncryptRound
+ commutative: false
+ documentation: !string |-
+ // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
+ // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+ // y is the chunk of w array in use.
+ // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
+- go: AESEncryptLastRound
+ commutative: false
+ documentation: !string |-
+ // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
+ // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+ // y is the chunk of w array in use.
+ // result = AddRoundKey((ShiftRows(SubBytes(x))), y)
+- go: AESRoundKeyGenAssist
+ commutative: false
+ documentation: !string |-
+ // NAME performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
+ // x is an array of AES words, but only x[0] and x[2] are used.
+ // r is a value from the Rcon constant array.
+ // result[0] = XOR(SubWord(RotWord(x[0])), r)
+ // result[1] = SubWord(x[1])
+ // result[2] = XOR(SubWord(RotWord(x[2])), r)
+ // result[3] = SubWord(x[3])
+- go: AESDecryptRound
+ commutative: false
+ documentation: !string |-
+ // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
+ // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+ // y is the chunk of dw array in use.
+ // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
+- go: AESDecryptLastRound
+ commutative: false
+ documentation: !string |-
+ // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
+ // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+ // y is the chunk of dw array in use.
+ // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
+- go: AESInvMixColumns
+ commutative: false
+ documentation: !string |-
+ // NAME performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
+ // x is the chunk of w array in use.
+ // result = InvMixColumns(x)
\ No newline at end of file
go: $t
out:
- *any
+- go: AESEncryptRound
+ asm: VAESENC
+ in:
+ - &uint8s
+ base: uint
+ overwriteElementBits: 8
+ - &uint32s
+ base: uint
+ overwriteElementBits: 32
+ out:
+ - *uint8s
+- go: AESEncryptLastRound
+ asm: VAESENCLAST
+ in:
+ - *uint8s
+ - *uint32s
+ out:
+ - *uint8s
+- go: AESRoundKeyGenAssist
+ asm: VAESKEYGENASSIST
+ in:
+ - *uint32s
+ - class: immediate
+ immOffset: 0
+ name: rconVal
+ out:
+ - *uint32s
+- go: AESDecryptRound
+ asm: VAESDEC
+ in:
+ - *uint8s
+ - *uint32s
+ out:
+ - *uint8s
+- go: AESDecryptLastRound
+ asm: VAESDECLAST
+ in:
+ - *uint8s
+ - *uint32s
+ out:
+ - *uint8s
+- go: AESInvMixColumns
+ asm: VAESIMC
+ in:
+ - *uint32s
+ out:
+ - *uint32s
\ No newline at end of file
{"AVX", ""}: "AVX",
{"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
{"AVX2", ""}: "AVX2",
+ {"AVXAES", ""}: "AVX, AES",
// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
{"AVX512EVEX", "AVX512F"}: "AVX512",
{"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
{"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
{"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
+ {"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES",
// AVX 10.2 (not yet supported)
{"AVX512EVEX", "AVX10_2_RC"}: "ignore",
return cpu.X86.HasAVX512GFNI
}
+// HasAVX512VAES returns whether the CPU supports the AVX512VAES feature.
+//
+// HasAVX512VAES is defined on all GOARCHes, but will only return true on
+// GOARCH amd64.
+func HasAVX512VAES() bool {
+ return cpu.X86.HasAVX512VAES
+}
+
// HasAVX512VBMI returns whether the CPU supports the AVX512VBMI feature.
//
// HasAVX512VBMI is defined on all GOARCHes, but will only return true on
package simd
+/* AESDecryptLastRound */
+
+// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
+//
+// Asm: VAESDECLAST, CPU Feature: AVX, AES
+func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
+
+// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
+//
+// Asm: VAESDECLAST, CPU Feature: AVX512VAES
+func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32
+
+/* AESDecryptRound */
+
+// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
+//
+// Asm: VAESDEC, CPU Feature: AVX, AES
+func (x Uint8x16) AESDecryptRound(y Uint32x4) Uint8x16
+
+// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
+//
+// Asm: VAESDEC, CPU Feature: AVX512VAES
+func (x Uint8x32) AESDecryptRound(y Uint32x8) Uint8x32
+
+/* AESEncryptLastRound */
+
+// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENCLAST, CPU Feature: AVX, AES
+func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
+
+// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENCLAST, CPU Feature: AVX512VAES
+func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32
+
+/* AESEncryptRound */
+
+// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENC, CPU Feature: AVX, AES
+func (x Uint8x16) AESEncryptRound(y Uint32x4) Uint8x16
+
+// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENC, CPU Feature: AVX512VAES
+func (x Uint8x32) AESEncryptRound(y Uint32x8) Uint8x32
+
+/* AESInvMixColumns */
+
+// AESInvMixColumns performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
+// x is the chunk of w array in use.
+// result = InvMixColumns(x)
+//
+// Asm: VAESIMC, CPU Feature: AVX, AES
+func (x Uint32x4) AESInvMixColumns() Uint32x4
+
+/* AESRoundKeyGenAssist */
+
+// AESRoundKeyGenAssist performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
+// x is an array of AES words, but only x[0] and x[2] are used.
+// r is a value from the Rcon constant array.
+// result[0] = XOR(SubWord(RotWord(x[0])), r)
+// result[1] = SubWord(x[1])
+// result[2] = XOR(SubWord(RotWord(x[2])), r)
+// result[3] = SubWord(x[3])
+//
+// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
+func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4
+
/* Abs */
// Abs computes the absolute value of each element.