]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile, simd: add AES instructions
authorJunyang Shao <shaojunyang@google.com>
Tue, 23 Sep 2025 05:16:30 +0000 (05:16 +0000)
committerJunyang Shao <shaojunyang@google.com>
Tue, 30 Sep 2025 17:37:49 +0000 (10:37 -0700)
AVXAES is a composite feature set, Intel did listed it as "AVXAES" in
the XED data instead of separating them.

The tests will be in the next CL.

Change-Id: I89c97261f2228b2fdafb48f63e82ef6239bdd5ca
Reviewed-on: https://go-review.googlesource.com/c/go/+/706055
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
15 files changed:
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/internal/cpu/cpu.go
src/internal/cpu/cpu_x86.go
src/simd/_gen/simdgen/gen_simdTypes.go
src/simd/_gen/simdgen/ops/Others/categories.yaml
src/simd/_gen/simdgen/ops/Others/go.yaml
src/simd/_gen/simdgen/xed.go
src/simd/cpu.go
src/simd/ops_amd64.go

index a4d24524357f391fe44140e555924e9203b43298..de9cad8a478a770162c3379b817392924b1b8642 100644 (file)
@@ -12,7 +12,8 @@ import (
 func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
        var p *obj.Prog
        switch v.Op {
-       case ssa.OpAMD64VPABSB128,
+       case ssa.OpAMD64VAESIMC128,
+               ssa.OpAMD64VPABSB128,
                ssa.OpAMD64VPABSB256,
                ssa.OpAMD64VPABSB512,
                ssa.OpAMD64VPABSW128,
@@ -148,7 +149,15 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VSQRTPD512:
                p = simdV11(s, v)
 
-       case ssa.OpAMD64VADDPS128,
+       case ssa.OpAMD64VAESDECLAST128,
+               ssa.OpAMD64VAESDECLAST256,
+               ssa.OpAMD64VAESDEC128,
+               ssa.OpAMD64VAESDEC256,
+               ssa.OpAMD64VAESENCLAST128,
+               ssa.OpAMD64VAESENCLAST256,
+               ssa.OpAMD64VAESENC128,
+               ssa.OpAMD64VAESENC256,
+               ssa.OpAMD64VADDPS128,
                ssa.OpAMD64VADDPS256,
                ssa.OpAMD64VADDPS512,
                ssa.OpAMD64VADDPD128,
@@ -917,7 +926,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPBLENDVB256:
                p = simdV31(s, v)
 
-       case ssa.OpAMD64VROUNDPS128,
+       case ssa.OpAMD64VAESKEYGENASSIST128,
+               ssa.OpAMD64VROUNDPS128,
                ssa.OpAMD64VROUNDPS256,
                ssa.OpAMD64VROUNDPD128,
                ssa.OpAMD64VROUNDPD256,
index 1eab8b5e6d6b146af9dc9909e46321338ab95623..d9229e958adac0b26d0925988a7a5a0089c0cfd3 100644 (file)
@@ -1,5 +1,15 @@
 // Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT.
 
+(AESDecryptLastRoundUint8x16 ...) => (VAESDECLAST128 ...)
+(AESDecryptLastRoundUint8x32 ...) => (VAESDECLAST256 ...)
+(AESDecryptRoundUint8x16 ...) => (VAESDEC128 ...)
+(AESDecryptRoundUint8x32 ...) => (VAESDEC256 ...)
+(AESEncryptLastRoundUint8x16 ...) => (VAESENCLAST128 ...)
+(AESEncryptLastRoundUint8x32 ...) => (VAESENCLAST256 ...)
+(AESEncryptRoundUint8x16 ...) => (VAESENC128 ...)
+(AESEncryptRoundUint8x32 ...) => (VAESENC256 ...)
+(AESInvMixColumnsUint32x4 ...) => (VAESIMC128 ...)
+(AESRoundKeyGenAssistUint32x4 ...) => (VAESKEYGENASSIST128 ...)
 (AbsInt8x16 ...) => (VPABSB128 ...)
 (AbsInt8x32 ...) => (VPABSB256 ...)
 (AbsInt8x64 ...) => (VPABSB512 ...)
index 5e1da3249fed10f15eaa26869f0a94565a524e21..680c576bb1468fa9764e7e28aa93873778cd6538 100644 (file)
@@ -21,6 +21,15 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VADDSUBPD256", argLength: 2, reg: v21, asm: "VADDSUBPD", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VAESDEC128", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VAESDEC256", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VAESDECLAST128", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VAESDECLAST256", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VAESENC128", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VAESENC256", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VAESENCLAST128", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VAESENCLAST256", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VAESIMC128", argLength: 1, reg: v11, asm: "VAESIMC", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
                {name: "VBROADCASTSDMasked256", argLength: 2, reg: wkw, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -1084,6 +1093,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VSUBPSMasked128", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VSUBPSMasked256", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VSUBPSMasked512", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VAESKEYGENASSIST128", argLength: 1, reg: v11, asm: "VAESKEYGENASSIST", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VROUNDPS128", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VROUNDPS256", argLength: 1, reg: v11, asm: "VROUNDPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VROUNDPD128", argLength: 1, reg: v11, asm: "VROUNDPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
index aa088dbf0bf63b02629c96c816591b77a4a4f675..2e9f3ff1c49fd305e28e5f0d85a41f2fe222f7bf 100644 (file)
@@ -4,6 +4,15 @@ package main
 
 func simdGenericOps() []opData {
        return []opData{
+               {name: "AESDecryptLastRoundUint8x16", argLength: 2, commutative: false},
+               {name: "AESDecryptLastRoundUint8x32", argLength: 2, commutative: false},
+               {name: "AESDecryptRoundUint8x16", argLength: 2, commutative: false},
+               {name: "AESDecryptRoundUint8x32", argLength: 2, commutative: false},
+               {name: "AESEncryptLastRoundUint8x16", argLength: 2, commutative: false},
+               {name: "AESEncryptLastRoundUint8x32", argLength: 2, commutative: false},
+               {name: "AESEncryptRoundUint8x16", argLength: 2, commutative: false},
+               {name: "AESEncryptRoundUint8x32", argLength: 2, commutative: false},
+               {name: "AESInvMixColumnsUint32x4", argLength: 1, commutative: false},
                {name: "AbsInt8x16", argLength: 1, commutative: false},
                {name: "AbsInt8x32", argLength: 1, commutative: false},
                {name: "AbsInt8x64", argLength: 1, commutative: false},
@@ -1101,6 +1110,7 @@ func simdGenericOps() []opData {
                {name: "moveMaskedUint16x32", argLength: 2, commutative: false},
                {name: "moveMaskedUint32x16", argLength: 2, commutative: false},
                {name: "moveMaskedUint64x8", argLength: 2, commutative: false},
+               {name: "AESRoundKeyGenAssistUint32x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledFloat32x8", argLength: 1, commutative: false, aux: "UInt8"},
                {name: "CeilScaledFloat32x16", argLength: 1, commutative: false, aux: "UInt8"},
index 105d1a803c6c3f4aa1158c63a6eb31566e24993e..7e44a31956af342f77663778ed8f707f55762993 100644 (file)
@@ -1253,6 +1253,15 @@ const (
        OpAMD64VADDSUBPD256
        OpAMD64VADDSUBPS128
        OpAMD64VADDSUBPS256
+       OpAMD64VAESDEC128
+       OpAMD64VAESDEC256
+       OpAMD64VAESDECLAST128
+       OpAMD64VAESDECLAST256
+       OpAMD64VAESENC128
+       OpAMD64VAESENC256
+       OpAMD64VAESENCLAST128
+       OpAMD64VAESENCLAST256
+       OpAMD64VAESIMC128
        OpAMD64VBROADCASTSD256
        OpAMD64VBROADCASTSD512
        OpAMD64VBROADCASTSDMasked256
@@ -2316,6 +2325,7 @@ const (
        OpAMD64VSUBPSMasked128
        OpAMD64VSUBPSMasked256
        OpAMD64VSUBPSMasked512
+       OpAMD64VAESKEYGENASSIST128
        OpAMD64VROUNDPS128
        OpAMD64VROUNDPS256
        OpAMD64VROUNDPD128
@@ -5401,6 +5411,15 @@ const (
        OpCvtMask64x4to8
        OpCvtMask64x8to8
        OpIsZeroVec
+       OpAESDecryptLastRoundUint8x16
+       OpAESDecryptLastRoundUint8x32
+       OpAESDecryptRoundUint8x16
+       OpAESDecryptRoundUint8x32
+       OpAESEncryptLastRoundUint8x16
+       OpAESEncryptLastRoundUint8x32
+       OpAESEncryptRoundUint8x16
+       OpAESEncryptRoundUint8x32
+       OpAESInvMixColumnsUint32x4
        OpAbsInt8x16
        OpAbsInt8x32
        OpAbsInt8x64
@@ -6498,6 +6517,7 @@ const (
        OpmoveMaskedUint16x32
        OpmoveMaskedUint32x16
        OpmoveMaskedUint64x8
+       OpAESRoundKeyGenAssistUint32x4
        OpCeilScaledFloat32x4
        OpCeilScaledFloat32x8
        OpCeilScaledFloat32x16
@@ -20088,6 +20108,131 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VAESDEC128",
+               argLen: 2,
+               asm:    x86.AVAESDEC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:   "VAESDEC256",
+               argLen: 2,
+               asm:    x86.AVAESDEC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VAESDECLAST128",
+               argLen: 2,
+               asm:    x86.AVAESDECLAST,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:   "VAESDECLAST256",
+               argLen: 2,
+               asm:    x86.AVAESDECLAST,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VAESENC128",
+               argLen: 2,
+               asm:    x86.AVAESENC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:   "VAESENC256",
+               argLen: 2,
+               asm:    x86.AVAESENC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VAESENCLAST128",
+               argLen: 2,
+               asm:    x86.AVAESENCLAST,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:   "VAESENCLAST256",
+               argLen: 2,
+               asm:    x86.AVAESENCLAST,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:   "VAESIMC128",
+               argLen: 1,
+               asm:    x86.AVAESIMC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:   "VBROADCASTSD256",
                argLen: 1,
@@ -35714,6 +35859,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VAESKEYGENASSIST128",
+               auxType: auxUInt8,
+               argLen:  1,
+               asm:     x86.AVAESKEYGENASSIST,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:    "VROUNDPS128",
                auxType: auxUInt8,
@@ -76061,6 +76220,51 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "AESDecryptLastRoundUint8x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESDecryptLastRoundUint8x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESDecryptRoundUint8x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESDecryptRoundUint8x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESEncryptLastRoundUint8x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESEncryptLastRoundUint8x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESEncryptRoundUint8x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESEncryptRoundUint8x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESInvMixColumnsUint32x4",
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "AbsInt8x16",
                argLen:  1,
@@ -81810,6 +82014,12 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "AESRoundKeyGenAssistUint32x4",
+               auxType: auxUInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "CeilScaledFloat32x4",
                auxType: auxUInt8,
index bc611fc44c4293d3421ed6fa80ffee2b655be6dc..84bb4c1148705fbe801e9cb820ac8b50819cd780 100644 (file)
@@ -9,6 +9,36 @@ import "cmd/compile/internal/types"
 
 func rewriteValueAMD64(v *Value) bool {
        switch v.Op {
+       case OpAESDecryptLastRoundUint8x16:
+               v.Op = OpAMD64VAESDECLAST128
+               return true
+       case OpAESDecryptLastRoundUint8x32:
+               v.Op = OpAMD64VAESDECLAST256
+               return true
+       case OpAESDecryptRoundUint8x16:
+               v.Op = OpAMD64VAESDEC128
+               return true
+       case OpAESDecryptRoundUint8x32:
+               v.Op = OpAMD64VAESDEC256
+               return true
+       case OpAESEncryptLastRoundUint8x16:
+               v.Op = OpAMD64VAESENCLAST128
+               return true
+       case OpAESEncryptLastRoundUint8x32:
+               v.Op = OpAMD64VAESENCLAST256
+               return true
+       case OpAESEncryptRoundUint8x16:
+               v.Op = OpAMD64VAESENC128
+               return true
+       case OpAESEncryptRoundUint8x32:
+               v.Op = OpAMD64VAESENC256
+               return true
+       case OpAESInvMixColumnsUint32x4:
+               v.Op = OpAMD64VAESIMC128
+               return true
+       case OpAESRoundKeyGenAssistUint32x4:
+               v.Op = OpAMD64VAESKEYGENASSIST128
+               return true
        case OpAMD64ADCQ:
                return rewriteValueAMD64_OpAMD64ADCQ(v)
        case OpAMD64ADCQconst:
index a62b3882c38aee20dce92c3995fcada80c3f42c0..f2e82d234cd90f4a1e7fb113c550bc4884276dc2 100644 (file)
@@ -12,6 +12,16 @@ import (
 const simdPackage = "simd"
 
 func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
+       addF(simdPackage, "Uint8x16.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x16.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x16.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x16.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.AESInvMixColumns", opLen1(ssa.OpAESInvMixColumnsUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x4.AESRoundKeyGenAssist", opLen1Imm8(ssa.OpAESRoundKeyGenAssistUint32x4, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int8x16.Abs", opLen1(ssa.OpAbsInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Abs", opLen1(ssa.OpAbsInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.Abs", opLen1(ssa.OpAbsInt8x64, types.TypeVec512), sys.AMD64)
index de27e89fc2fb2f5b4048c8930a7ef6625bfaa7dd..4dffeadb2288359c479226a03012a5a970adcd7a 100644 (file)
@@ -34,6 +34,7 @@ var X86 struct {
        HasAVX512DQ         bool
        HasAVX512VL         bool
        HasAVX512GFNI       bool
+       HasAVX512VAES       bool
        HasAVX512VNNI       bool
        HasAVX512VBMI       bool
        HasAVX512VBMI2      bool
index ef1874ad68c23e201c244625770e57fe01585203..4610ce807eea6a197110dd0cd58bd5b40997c0f6 100644 (file)
@@ -28,6 +28,7 @@ const (
        cpuid_AVX512VBMI2     = 1 << 6
        cpuid_SSSE3           = 1 << 9
        cpuid_AVX512GFNI      = 1 << 8
+       cpuid_AVX512VAES      = 1 << 9
        cpuid_AVX512VNNI      = 1 << 11
        cpuid_AVX512BITALG    = 1 << 12
        cpuid_FMA             = 1 << 12
@@ -182,6 +183,7 @@ func doinit() {
                X86.HasAVX512VPOPCNTDQ = isSet(ecx7, cpuid_AVX512VPOPCNTDQ)
                X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512VBMI)
                X86.HasAVX512VBMI2 = isSet(ecx7, cpuid_AVX512VBMI2)
+               X86.HasAVX512VAES = isSet(ecx7, cpuid_AVX512VAES)
                X86.HasAVX512VNNI = isSet(ecx7, cpuid_AVX512VNNI)
                X86.HasAVX512VPCLMULQDQ = isSet(ecx7, cpuid_AVX512VPCLMULQDQ)
                X86.HasAVX512VBMI = isSet(ecx7, cpuid_AVX512_VBMI)
index 8944c35cad715ad174f8c55a5eaacd6537400990..f13be87f7b153055c514783cb79e8ab2b451bfdb 100644 (file)
@@ -563,7 +563,10 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
        }
        featureSet := make(map[featureKey]struct{})
        for _, op := range ops {
-               featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{}
+               if !strings.Contains(op.CPUFeature, ",") {
+                       featureSet[featureKey{op.GoArch, op.CPUFeature}] = struct{}{}
+               }
+               // Don't generate feature checks for composite features.
        }
        features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
                if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
index 4489f4f403fe517b031ebbaa63b6856b03c35e67..dd922fb14b1990d8c7bfcc9bfc17fc8c4872cb3f 100644 (file)
@@ -3,3 +3,47 @@
   commutative: false
   documentation: !string |-
     // NAME counts the leading zeros of each element in x.
+- go: AESEncryptRound
+  commutative: false
+  documentation: !string |-
+    // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
+    // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+    // y is the chunk of w array in use.
+    // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
+- go: AESEncryptLastRound
+  commutative: false
+  documentation: !string |-
+    // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
+    // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+    // y is the chunk of w array in use.
+    // result = AddRoundKey((ShiftRows(SubBytes(x))), y)
+- go: AESRoundKeyGenAssist
+  commutative: false
+  documentation: !string |-
+    // NAME performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
+    // x is an array of AES words, but only x[0] and x[2] are used.
+    // r is a value from the Rcon constant array.
+    // result[0] = XOR(SubWord(RotWord(x[0])), r)
+    // result[1] = SubWord(x[1])
+    // result[2] = XOR(SubWord(RotWord(x[2])), r)
+    // result[3] = SubWord(x[3])
+- go: AESDecryptRound
+  commutative: false
+  documentation: !string |-
+    // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
+    // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+    // y is the chunk of dw array in use.
+    // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
+- go: AESDecryptLastRound
+  commutative: false
+  documentation: !string |-
+    // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
+    // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+    // y is the chunk of dw array in use.
+    // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
+- go: AESInvMixColumns
+  commutative: false
+  documentation: !string |-
+    // NAME performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
+    // x is the chunk of w array in use.
+    // result = InvMixColumns(x)
\ No newline at end of file
index a4fd87407b6ef236d215410f9c35d31497fc8ed6..0f8b7b43a268f19c577e766d46d8a24ce47fd18a 100644 (file)
@@ -6,3 +6,50 @@
     go: $t
   out:
   - *any
+- go: AESEncryptRound
+  asm: VAESENC
+  in:
+  - &uint8s
+    base: uint
+    overwriteElementBits: 8
+  - &uint32s
+    base: uint
+    overwriteElementBits: 32
+  out:
+  - *uint8s
+- go: AESEncryptLastRound
+  asm: VAESENCLAST
+  in:
+  - *uint8s
+  - *uint32s
+  out:
+  - *uint8s
+- go: AESRoundKeyGenAssist
+  asm: VAESKEYGENASSIST
+  in:
+  - *uint32s
+  - class: immediate
+    immOffset: 0
+    name: rconVal
+  out:
+  - *uint32s
+- go: AESDecryptRound
+  asm: VAESDEC
+  in:
+  - *uint8s
+  - *uint32s
+  out:
+  - *uint8s
+- go: AESDecryptLastRound
+  asm: VAESDECLAST
+  in:
+  - *uint8s
+  - *uint32s
+  out:
+  - *uint8s
+- go: AESInvMixColumns
+  asm: VAESIMC
+  in:
+  - *uint32s
+  out:
+  - *uint32s
\ No newline at end of file
index e521f0c8d440ae61aa50c02eee187aa42e38424f..1781f5c74d03a2ae683b849110e370f66f48899d 100644 (file)
@@ -770,6 +770,7 @@ var cpuFeatureMap = map[cpuFeatureKey]string{
        {"AVX", ""}:              "AVX",
        {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
        {"AVX2", ""}:             "AVX2",
+       {"AVXAES", ""}:           "AVX, AES",
 
        // AVX-512 foundational features. We combine all of these into one "AVX512" feature.
        {"AVX512EVEX", "AVX512F"}:  "AVX512",
@@ -786,6 +787,7 @@ var cpuFeatureMap = map[cpuFeatureKey]string{
        {"AVX512EVEX", "AVX512_VBMI"}:      "AVX512VBMI",
        {"AVX512EVEX", "AVX512_VNNI"}:      "AVX512VNNI",
        {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
+       {"AVX512EVEX", "AVX512_VAES"}:      "AVX512VAES",
 
        // AVX 10.2 (not yet supported)
        {"AVX512EVEX", "AVX10_2_RC"}: "ignore",
index cbde9a8e1ff2eba923eb59d997027328c52f2064..2837c76d32109ddc813337afce4884284c56517f 100644 (file)
@@ -51,6 +51,14 @@ func HasAVX512GFNI() bool {
        return cpu.X86.HasAVX512GFNI
 }
 
+// HasAVX512VAES returns whether the CPU supports the AVX512VAES feature.
+//
+// HasAVX512VAES is defined on all GOARCHes, but will only return true on
+// GOARCH amd64.
+func HasAVX512VAES() bool {
+       return cpu.X86.HasAVX512VAES
+}
+
 // HasAVX512VBMI returns whether the CPU supports the AVX512VBMI feature.
 //
 // HasAVX512VBMI is defined on all GOARCHes, but will only return true on
index 17f45e6bf5e6d987a6ba0cf04ecc3028134e81dc..8956c2e0772c0dc69c63b01237ece5f401c94f7d 100644 (file)
@@ -4,6 +4,102 @@
 
 package simd
 
+/* AESDecryptLastRound */
+
+// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
+//
+// Asm: VAESDECLAST, CPU Feature: AVX, AES
+func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
+
+// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
+//
+// Asm: VAESDECLAST, CPU Feature: AVX512VAES
+func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32
+
+/* AESDecryptRound */
+
+// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
+//
+// Asm: VAESDEC, CPU Feature: AVX, AES
+func (x Uint8x16) AESDecryptRound(y Uint32x4) Uint8x16
+
+// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
+//
+// Asm: VAESDEC, CPU Feature: AVX512VAES
+func (x Uint8x32) AESDecryptRound(y Uint32x8) Uint8x32
+
+/* AESEncryptLastRound */
+
+// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENCLAST, CPU Feature: AVX, AES
+func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
+
+// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENCLAST, CPU Feature: AVX512VAES
+func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32
+
+/* AESEncryptRound */
+
+// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENC, CPU Feature: AVX, AES
+func (x Uint8x16) AESEncryptRound(y Uint32x4) Uint8x16
+
+// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENC, CPU Feature: AVX512VAES
+func (x Uint8x32) AESEncryptRound(y Uint32x8) Uint8x32
+
+/* AESInvMixColumns */
+
+// AESInvMixColumns performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197.
+// x is the chunk of w array in use.
+// result = InvMixColumns(x)
+//
+// Asm: VAESIMC, CPU Feature: AVX, AES
+func (x Uint32x4) AESInvMixColumns() Uint32x4
+
+/* AESRoundKeyGenAssist */
+
+// AESRoundKeyGenAssist performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197.
+// x is an array of AES words, but only x[0] and x[2] are used.
+// r is a value from the Rcon constant array.
+// result[0] = XOR(SubWord(RotWord(x[0])), r)
+// result[1] = SubWord(x[1])
+// result[2] = XOR(SubWord(RotWord(x[2])), r)
+// result[3] = SubWord(x[3])
+//
+// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES
+func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4
+
 /* Abs */
 
 // Abs computes the absolute value of each element.