]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile, simd: change AES op names and add missing size
authorJunyang Shao <shaojunyang@google.com>
Mon, 17 Nov 2025 20:45:30 +0000 (20:45 +0000)
committerJunyang Shao <shaojunyang@google.com>
Mon, 17 Nov 2025 21:37:42 +0000 (13:37 -0800)
This CL changed AESEncryptRound and AESDecryptRound to
AESEncryptOneRound and AESDecryptOneRound.

This CL also adds the 512-bit version of some AES instructions.

Change-Id: Ia851a008cce2145b1ff193a89e172862060a725d
Reviewed-on: https://go-review.googlesource.com/c/go/+/721280
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/_gen/simdgen/ops/Others/categories.yaml
src/simd/_gen/simdgen/ops/Others/go.yaml
src/simd/_gen/simdgen/types.yaml
src/simd/ops_amd64.go

index e2d6f6321bf4cd2c4ddde8f0f9310aba751819ba..82ec733cc0994124a5848b449b89010d09ca802a 100644 (file)
@@ -178,12 +178,16 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 
        case ssa.OpAMD64VAESDECLAST128,
                ssa.OpAMD64VAESDECLAST256,
+               ssa.OpAMD64VAESDECLAST512,
                ssa.OpAMD64VAESDEC128,
                ssa.OpAMD64VAESDEC256,
+               ssa.OpAMD64VAESDEC512,
                ssa.OpAMD64VAESENCLAST128,
                ssa.OpAMD64VAESENCLAST256,
+               ssa.OpAMD64VAESENCLAST512,
                ssa.OpAMD64VAESENC128,
                ssa.OpAMD64VAESENC256,
+               ssa.OpAMD64VAESENC512,
                ssa.OpAMD64VADDPS128,
                ssa.OpAMD64VADDPS256,
                ssa.OpAMD64VADDPS512,
index 4723546b12749ccc876855de4a444f5e0de58831..ea1094b80579591cf0a563201fb489e5da26a08b 100644 (file)
@@ -2,12 +2,16 @@
 
 (AESDecryptLastRoundUint8x16 ...) => (VAESDECLAST128 ...)
 (AESDecryptLastRoundUint8x32 ...) => (VAESDECLAST256 ...)
-(AESDecryptRoundUint8x16 ...) => (VAESDEC128 ...)
-(AESDecryptRoundUint8x32 ...) => (VAESDEC256 ...)
+(AESDecryptLastRoundUint8x64 ...) => (VAESDECLAST512 ...)
+(AESDecryptOneRoundUint8x16 ...) => (VAESDEC128 ...)
+(AESDecryptOneRoundUint8x32 ...) => (VAESDEC256 ...)
+(AESDecryptOneRoundUint8x64 ...) => (VAESDEC512 ...)
 (AESEncryptLastRoundUint8x16 ...) => (VAESENCLAST128 ...)
 (AESEncryptLastRoundUint8x32 ...) => (VAESENCLAST256 ...)
-(AESEncryptRoundUint8x16 ...) => (VAESENC128 ...)
-(AESEncryptRoundUint8x32 ...) => (VAESENC256 ...)
+(AESEncryptLastRoundUint8x64 ...) => (VAESENCLAST512 ...)
+(AESEncryptOneRoundUint8x16 ...) => (VAESENC128 ...)
+(AESEncryptOneRoundUint8x32 ...) => (VAESENC256 ...)
+(AESEncryptOneRoundUint8x64 ...) => (VAESENC512 ...)
 (AESInvMixColumnsUint32x4 ...) => (VAESIMC128 ...)
 (AESRoundKeyGenAssistUint32x4 ...) => (VAESKEYGENASSIST128 ...)
 (AbsInt8x16 ...) => (VPABSB128 ...)
index 4f722f8a1106ff9c8d60dd2ab29e0b472fd8fc1e..674cfb19d6ae077fe526294affb5cdff11cf73d7 100644 (file)
@@ -28,12 +28,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VAESDEC128", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VAESDEC256", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VAESDEC512", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec512", resultInArg0: false},
                {name: "VAESDECLAST128", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VAESDECLAST256", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VAESDECLAST512", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec512", resultInArg0: false},
                {name: "VAESENC128", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VAESENC256", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VAESENC512", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec512", resultInArg0: false},
                {name: "VAESENCLAST128", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VAESENCLAST256", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VAESENCLAST512", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec512", resultInArg0: false},
                {name: "VAESIMC128", argLength: 1, reg: v11, asm: "VAESIMC", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false},
index 15608e4fa6afea67cffadee17491cf74bf384794..b97e5f4f142529b96667489468b000e545cdfda8 100644 (file)
@@ -6,12 +6,16 @@ func simdGenericOps() []opData {
        return []opData{
                {name: "AESDecryptLastRoundUint8x16", argLength: 2, commutative: false},
                {name: "AESDecryptLastRoundUint8x32", argLength: 2, commutative: false},
-               {name: "AESDecryptRoundUint8x16", argLength: 2, commutative: false},
-               {name: "AESDecryptRoundUint8x32", argLength: 2, commutative: false},
+               {name: "AESDecryptLastRoundUint8x64", argLength: 2, commutative: false},
+               {name: "AESDecryptOneRoundUint8x16", argLength: 2, commutative: false},
+               {name: "AESDecryptOneRoundUint8x32", argLength: 2, commutative: false},
+               {name: "AESDecryptOneRoundUint8x64", argLength: 2, commutative: false},
                {name: "AESEncryptLastRoundUint8x16", argLength: 2, commutative: false},
                {name: "AESEncryptLastRoundUint8x32", argLength: 2, commutative: false},
-               {name: "AESEncryptRoundUint8x16", argLength: 2, commutative: false},
-               {name: "AESEncryptRoundUint8x32", argLength: 2, commutative: false},
+               {name: "AESEncryptLastRoundUint8x64", argLength: 2, commutative: false},
+               {name: "AESEncryptOneRoundUint8x16", argLength: 2, commutative: false},
+               {name: "AESEncryptOneRoundUint8x32", argLength: 2, commutative: false},
+               {name: "AESEncryptOneRoundUint8x64", argLength: 2, commutative: false},
                {name: "AESInvMixColumnsUint32x4", argLength: 1, commutative: false},
                {name: "AbsInt8x16", argLength: 1, commutative: false},
                {name: "AbsInt8x32", argLength: 1, commutative: false},
index 6bbc29dd126e1e37e1e0b6e4f50f5eaaa9363eb3..bd94b4d57641622a27367f704f110762632c8caa 100644 (file)
@@ -1269,12 +1269,16 @@ const (
        OpAMD64VADDSUBPS256
        OpAMD64VAESDEC128
        OpAMD64VAESDEC256
+       OpAMD64VAESDEC512
        OpAMD64VAESDECLAST128
        OpAMD64VAESDECLAST256
+       OpAMD64VAESDECLAST512
        OpAMD64VAESENC128
        OpAMD64VAESENC256
+       OpAMD64VAESENC512
        OpAMD64VAESENCLAST128
        OpAMD64VAESENCLAST256
+       OpAMD64VAESENCLAST512
        OpAMD64VAESIMC128
        OpAMD64VBROADCASTSD256
        OpAMD64VBROADCASTSD512
@@ -5950,12 +5954,16 @@ const (
        OpIsZeroVec
        OpAESDecryptLastRoundUint8x16
        OpAESDecryptLastRoundUint8x32
-       OpAESDecryptRoundUint8x16
-       OpAESDecryptRoundUint8x32
+       OpAESDecryptLastRoundUint8x64
+       OpAESDecryptOneRoundUint8x16
+       OpAESDecryptOneRoundUint8x32
+       OpAESDecryptOneRoundUint8x64
        OpAESEncryptLastRoundUint8x16
        OpAESEncryptLastRoundUint8x32
-       OpAESEncryptRoundUint8x16
-       OpAESEncryptRoundUint8x32
+       OpAESEncryptLastRoundUint8x64
+       OpAESEncryptOneRoundUint8x16
+       OpAESEncryptOneRoundUint8x32
+       OpAESEncryptOneRoundUint8x64
        OpAESInvMixColumnsUint32x4
        OpAbsInt8x16
        OpAbsInt8x32
@@ -20873,6 +20881,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VAESDEC512",
+               argLen: 2,
+               asm:    x86.AVAESDEC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:   "VAESDECLAST128",
                argLen: 2,
@@ -20901,6 +20923,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VAESDECLAST512",
+               argLen: 2,
+               asm:    x86.AVAESDECLAST,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:   "VAESENC128",
                argLen: 2,
@@ -20929,6 +20965,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VAESENC512",
+               argLen: 2,
+               asm:    x86.AVAESENC,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:   "VAESENCLAST128",
                argLen: 2,
@@ -20957,6 +21007,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VAESENCLAST512",
+               argLen: 2,
+               asm:    x86.AVAESENCLAST,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:   "VAESIMC128",
                argLen: 1,
@@ -85740,12 +85804,22 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "AESDecryptRoundUint8x16",
+               name:    "AESDecryptLastRoundUint8x64",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESDecryptOneRoundUint8x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESDecryptOneRoundUint8x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "AESDecryptRoundUint8x32",
+               name:    "AESDecryptOneRoundUint8x64",
                argLen:  2,
                generic: true,
        },
@@ -85760,12 +85834,22 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "AESEncryptRoundUint8x16",
+               name:    "AESEncryptLastRoundUint8x64",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESEncryptOneRoundUint8x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "AESEncryptOneRoundUint8x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "AESEncryptRoundUint8x32",
+               name:    "AESEncryptOneRoundUint8x64",
                argLen:  2,
                generic: true,
        },
index dff333337256cd881295a9b289c3cee17c587aba..042649f25627956968fe9801343fb59a014e41e0 100644 (file)
@@ -16,24 +16,36 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAESDecryptLastRoundUint8x32:
                v.Op = OpAMD64VAESDECLAST256
                return true
-       case OpAESDecryptRoundUint8x16:
+       case OpAESDecryptLastRoundUint8x64:
+               v.Op = OpAMD64VAESDECLAST512
+               return true
+       case OpAESDecryptOneRoundUint8x16:
                v.Op = OpAMD64VAESDEC128
                return true
-       case OpAESDecryptRoundUint8x32:
+       case OpAESDecryptOneRoundUint8x32:
                v.Op = OpAMD64VAESDEC256
                return true
+       case OpAESDecryptOneRoundUint8x64:
+               v.Op = OpAMD64VAESDEC512
+               return true
        case OpAESEncryptLastRoundUint8x16:
                v.Op = OpAMD64VAESENCLAST128
                return true
        case OpAESEncryptLastRoundUint8x32:
                v.Op = OpAMD64VAESENCLAST256
                return true
-       case OpAESEncryptRoundUint8x16:
+       case OpAESEncryptLastRoundUint8x64:
+               v.Op = OpAMD64VAESENCLAST512
+               return true
+       case OpAESEncryptOneRoundUint8x16:
                v.Op = OpAMD64VAESENC128
                return true
-       case OpAESEncryptRoundUint8x32:
+       case OpAESEncryptOneRoundUint8x32:
                v.Op = OpAMD64VAESENC256
                return true
+       case OpAESEncryptOneRoundUint8x64:
+               v.Op = OpAMD64VAESENC512
+               return true
        case OpAESInvMixColumnsUint32x4:
                v.Op = OpAMD64VAESIMC128
                return true
index 5c941321a4fc5e3a707a8bfd042f6dbed40b1db1..f3aa904f6c97a74584675719a942a8e9dc7eeacf 100644 (file)
@@ -14,12 +14,16 @@ const simdPackage = "simd"
 func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
        addF(simdPackage, "Uint8x16.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x32.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x16.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.AESDecryptOneRound", opLen2(ssa.OpAESDecryptOneRoundUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.AESDecryptOneRound", opLen2(ssa.OpAESDecryptOneRoundUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.AESDecryptOneRound", opLen2(ssa.OpAESDecryptOneRoundUint8x64, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint8x16.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x32.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x16.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.AESEncryptOneRound", opLen2(ssa.OpAESEncryptOneRoundUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.AESEncryptOneRound", opLen2(ssa.OpAESEncryptOneRoundUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.AESEncryptOneRound", opLen2(ssa.OpAESEncryptOneRoundUint8x64, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x4.AESInvMixColumns", opLen1(ssa.OpAESInvMixColumnsUint32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint32x4.AESRoundKeyGenAssist", opLen1Imm8(ssa.OpAESRoundKeyGenAssistUint32x4, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int8x16.Abs", opLen1(ssa.OpAbsInt8x16, types.TypeVec128), sys.AMD64)
index 3c8befb82634b5a6d1a1074336f51017820ac8be..8ecf066e804599b1b0c499b881c75e93aef21a7d 100644 (file)
@@ -3,7 +3,7 @@
   commutative: false
   documentation: !string |-
     // NAME counts the leading zeros of each element in x.
-- go: AESEncryptRound
+- go: AESEncryptOneRound
   commutative: false
   documentation: !string |-
     // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
@@ -27,7 +27,7 @@
     // result[1] = SubWord(x[1])
     // result[2] = XOR(SubWord(RotWord(x[2])), r)
     // result[3] = SubWord(x[3])
-- go: AESDecryptRound
+- go: AESDecryptOneRound
   commutative: false
   documentation: !string |-
     // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197.
index 77b9fc378307e1e5b5f256301210aeb3a60d3619..f89d7ef82d40c1992b577e4806750bc8070c5e08 100644 (file)
@@ -6,7 +6,7 @@
     go: $t
   out:
   - *any
-- go: AESEncryptRound
+- go: AESEncryptOneRound
   asm: VAESENC
   in:
   - &uint8s
@@ -33,7 +33,7 @@
     name: rconVal
   out:
   - *uint32s
-- go: AESDecryptRound
+- go: AESDecryptOneRound
   asm: VAESDEC
   in:
   - *uint8s
index f7a01cb360d306b16e1ea72c8b46f2c460062c5e..9dccd1e76415a86c6317bbc1ffe3cde53eb0aaef 100644 (file)
@@ -83,6 +83,17 @@ in: !repeat
   - {class: vreg, go: Int64x4,    base: "int",   elemBits: 128, bits: 256, lanes: 4}
   - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 128, bits: 256, lanes: 4}
 
+# Special shapes just to make VAES(ENC|DEC)(LAST)?512 work.
+# The elemBits field of these shapes are wrong, it would be overwritten by overwriteElemBits.
+  - {class: vreg, go: Int8x32,    base: "int",   elemBits: 128, bits: 512, lanes: 32}
+  - {class: vreg, go: Uint8x32,   base: "uint",  elemBits: 128, bits: 512, lanes: 32}
+  - {class: vreg, go: Int16x16,   base: "int",   elemBits: 128, bits: 512, lanes: 16}
+  - {class: vreg, go: Uint16x16,  base: "uint",  elemBits: 128, bits: 512, lanes: 16}
+  - {class: vreg, go: Int32x8,    base: "int",   elemBits: 128, bits: 512, lanes: 8}
+  - {class: vreg, go: Uint32x8,   base: "uint",  elemBits: 128, bits: 512, lanes: 8}
+  - {class: vreg, go: Int64x4,    base: "int",   elemBits: 128, bits: 512, lanes: 4}
+  - {class: vreg, go: Uint64x4,   base: "uint",  elemBits: 128, bits: 512, lanes: 4}
+
   - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now.
 inVariant: !repeat
 - *types
index ee472d11637c54f9b8751abffd2cb2f8a0d4e5d6..88b951990c8c7dc6b19ae0547525cf03d0b338fc 100644 (file)
@@ -22,23 +22,39 @@ func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16
 // Asm: VAESDECLAST, CPU Feature: AVX512VAES
 func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32
 
-/* AESDecryptRound */
+// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y)
+//
+// Asm: VAESDECLAST, CPU Feature: AVX512VAES
+func (x Uint8x64) AESDecryptLastRound(y Uint32x16) Uint8x64
+
+/* AESDecryptOneRound */
 
-// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
 // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
 // y is the chunk of dw array in use.
 // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
 //
 // Asm: VAESDEC, CPU Feature: AVX, AES
-func (x Uint8x16) AESDecryptRound(y Uint32x4) Uint8x16
+func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16
+
+// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of dw array in use.
+// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
+//
+// Asm: VAESDEC, CPU Feature: AVX512VAES
+func (x Uint8x32) AESDecryptOneRound(y Uint32x8) Uint8x32
 
-// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
 // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
 // y is the chunk of dw array in use.
 // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y)
 //
 // Asm: VAESDEC, CPU Feature: AVX512VAES
-func (x Uint8x32) AESDecryptRound(y Uint32x8) Uint8x32
+func (x Uint8x64) AESDecryptOneRound(y Uint32x16) Uint8x64
 
 /* AESEncryptLastRound */
 
@@ -58,23 +74,39 @@ func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16
 // Asm: VAESENCLAST, CPU Feature: AVX512VAES
 func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32
 
-/* AESEncryptRound */
+// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey((ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENCLAST, CPU Feature: AVX512VAES
+func (x Uint8x64) AESEncryptLastRound(y Uint32x16) Uint8x64
+
+/* AESEncryptOneRound */
 
-// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
 // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
 // y is the chunk of w array in use.
 // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
 //
 // Asm: VAESENC, CPU Feature: AVX, AES
-func (x Uint8x16) AESEncryptRound(y Uint32x4) Uint8x16
+func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16
+
+// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
+// y is the chunk of w array in use.
+// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
+//
+// Asm: VAESENC, CPU Feature: AVX512VAES
+func (x Uint8x32) AESEncryptOneRound(y Uint32x8) Uint8x32
 
-// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
+// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197.
 // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33.
 // y is the chunk of w array in use.
 // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y)
 //
 // Asm: VAESENC, CPU Feature: AVX512VAES
-func (x Uint8x32) AESEncryptRound(y Uint32x8) Uint8x32
+func (x Uint8x64) AESEncryptOneRound(y Uint32x16) Uint8x64
 
 /* AESInvMixColumns */