From: Junyang Shao Date: Mon, 17 Nov 2025 20:45:30 +0000 (+0000) Subject: [dev.simd] cmd/compile, simd: change AES op names and add missing size X-Git-Tag: go1.26rc1~147^2~21 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0978935a99;p=gostls13.git [dev.simd] cmd/compile, simd: change AES op names and add missing size This CL changed AESEncryptRound and AESDecryptRound to AESEncryptOneRound and AESDecryptOneRound. This CL also adds the 512-bit version of some AES instructions. Change-Id: Ia851a008cce2145b1ff193a89e172862060a725d Reviewed-on: https://go-review.googlesource.com/c/go/+/721280 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index e2d6f6321b..82ec733cc0 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -178,12 +178,16 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { case ssa.OpAMD64VAESDECLAST128, ssa.OpAMD64VAESDECLAST256, + ssa.OpAMD64VAESDECLAST512, ssa.OpAMD64VAESDEC128, ssa.OpAMD64VAESDEC256, + ssa.OpAMD64VAESDEC512, ssa.OpAMD64VAESENCLAST128, ssa.OpAMD64VAESENCLAST256, + ssa.OpAMD64VAESENCLAST512, ssa.OpAMD64VAESENC128, ssa.OpAMD64VAESENC256, + ssa.OpAMD64VAESENC512, ssa.OpAMD64VADDPS128, ssa.OpAMD64VADDPS256, ssa.OpAMD64VADDPS512, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 4723546b12..ea1094b805 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -2,12 +2,16 @@ (AESDecryptLastRoundUint8x16 ...) => (VAESDECLAST128 ...) (AESDecryptLastRoundUint8x32 ...) => (VAESDECLAST256 ...) -(AESDecryptRoundUint8x16 ...) => (VAESDEC128 ...) -(AESDecryptRoundUint8x32 ...) => (VAESDEC256 ...) +(AESDecryptLastRoundUint8x64 ...) => (VAESDECLAST512 ...) +(AESDecryptOneRoundUint8x16 ...) => (VAESDEC128 ...) +(AESDecryptOneRoundUint8x32 ...) => (VAESDEC256 ...) +(AESDecryptOneRoundUint8x64 ...) => (VAESDEC512 ...) (AESEncryptLastRoundUint8x16 ...) => (VAESENCLAST128 ...) (AESEncryptLastRoundUint8x32 ...) => (VAESENCLAST256 ...) -(AESEncryptRoundUint8x16 ...) => (VAESENC128 ...) -(AESEncryptRoundUint8x32 ...) => (VAESENC256 ...) +(AESEncryptLastRoundUint8x64 ...) => (VAESENCLAST512 ...) +(AESEncryptOneRoundUint8x16 ...) => (VAESENC128 ...) +(AESEncryptOneRoundUint8x32 ...) => (VAESENC256 ...) +(AESEncryptOneRoundUint8x64 ...) => (VAESENC512 ...) (AESInvMixColumnsUint32x4 ...) => (VAESIMC128 ...) (AESRoundKeyGenAssistUint32x4 ...) => (VAESKEYGENASSIST128 ...) (AbsInt8x16 ...) => (VPABSB128 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 4f722f8a11..674cfb19d6 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -28,12 +28,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VAESDEC128", argLength: 2, reg: v21, asm: "VAESDEC", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VAESDEC256", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESDEC512", argLength: 2, reg: w21, asm: "VAESDEC", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VAESDECLAST128", argLength: 2, reg: v21, asm: "VAESDECLAST", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VAESDECLAST256", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESDECLAST512", argLength: 2, reg: w21, asm: "VAESDECLAST", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VAESENC128", argLength: 2, reg: v21, asm: "VAESENC", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VAESENC256", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESENC512", argLength: 2, reg: w21, asm: "VAESENC", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VAESENCLAST128", argLength: 2, reg: v21, asm: "VAESENCLAST", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VAESENCLAST256", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VAESENCLAST512", argLength: 2, reg: w21, asm: "VAESENCLAST", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VAESIMC128", argLength: 1, reg: v11, asm: "VAESIMC", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VBROADCASTSD256", argLength: 1, reg: v11, asm: "VBROADCASTSD", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VBROADCASTSD512", argLength: 1, reg: w11, asm: "VBROADCASTSD", commutative: false, typ: "Vec512", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index 15608e4fa6..b97e5f4f14 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -6,12 +6,16 @@ func simdGenericOps() []opData { return []opData{ {name: "AESDecryptLastRoundUint8x16", argLength: 2, commutative: false}, {name: "AESDecryptLastRoundUint8x32", argLength: 2, commutative: false}, - {name: "AESDecryptRoundUint8x16", argLength: 2, commutative: false}, - {name: "AESDecryptRoundUint8x32", argLength: 2, commutative: false}, + {name: "AESDecryptLastRoundUint8x64", argLength: 2, commutative: false}, + {name: "AESDecryptOneRoundUint8x16", argLength: 2, commutative: false}, + {name: "AESDecryptOneRoundUint8x32", argLength: 2, commutative: false}, + {name: "AESDecryptOneRoundUint8x64", argLength: 2, commutative: false}, {name: "AESEncryptLastRoundUint8x16", argLength: 2, commutative: false}, {name: "AESEncryptLastRoundUint8x32", argLength: 2, commutative: false}, - {name: "AESEncryptRoundUint8x16", argLength: 2, commutative: false}, - {name: "AESEncryptRoundUint8x32", argLength: 2, commutative: false}, + {name: "AESEncryptLastRoundUint8x64", argLength: 2, commutative: false}, + {name: "AESEncryptOneRoundUint8x16", argLength: 2, commutative: false}, + {name: "AESEncryptOneRoundUint8x32", argLength: 2, commutative: false}, + {name: "AESEncryptOneRoundUint8x64", argLength: 2, commutative: false}, {name: "AESInvMixColumnsUint32x4", argLength: 1, commutative: false}, {name: "AbsInt8x16", argLength: 1, commutative: false}, {name: "AbsInt8x32", argLength: 1, commutative: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 6bbc29dd12..bd94b4d576 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1269,12 +1269,16 @@ const ( OpAMD64VADDSUBPS256 OpAMD64VAESDEC128 OpAMD64VAESDEC256 + OpAMD64VAESDEC512 OpAMD64VAESDECLAST128 OpAMD64VAESDECLAST256 + OpAMD64VAESDECLAST512 OpAMD64VAESENC128 OpAMD64VAESENC256 + OpAMD64VAESENC512 OpAMD64VAESENCLAST128 OpAMD64VAESENCLAST256 + OpAMD64VAESENCLAST512 OpAMD64VAESIMC128 OpAMD64VBROADCASTSD256 OpAMD64VBROADCASTSD512 @@ -5950,12 +5954,16 @@ const ( OpIsZeroVec OpAESDecryptLastRoundUint8x16 OpAESDecryptLastRoundUint8x32 - OpAESDecryptRoundUint8x16 - OpAESDecryptRoundUint8x32 + OpAESDecryptLastRoundUint8x64 + OpAESDecryptOneRoundUint8x16 + OpAESDecryptOneRoundUint8x32 + OpAESDecryptOneRoundUint8x64 OpAESEncryptLastRoundUint8x16 OpAESEncryptLastRoundUint8x32 - OpAESEncryptRoundUint8x16 - OpAESEncryptRoundUint8x32 + OpAESEncryptLastRoundUint8x64 + OpAESEncryptOneRoundUint8x16 + OpAESEncryptOneRoundUint8x32 + OpAESEncryptOneRoundUint8x64 OpAESInvMixColumnsUint32x4 OpAbsInt8x16 OpAbsInt8x32 @@ -20873,6 +20881,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VAESDEC512", + argLen: 2, + asm: x86.AVAESDEC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VAESDECLAST128", argLen: 2, @@ -20901,6 +20923,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VAESDECLAST512", + argLen: 2, + asm: x86.AVAESDECLAST, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VAESENC128", argLen: 2, @@ -20929,6 +20965,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VAESENC512", + argLen: 2, + asm: x86.AVAESENC, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VAESENCLAST128", argLen: 2, @@ -20957,6 +21007,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VAESENCLAST512", + argLen: 2, + asm: x86.AVAESENCLAST, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VAESIMC128", argLen: 1, @@ -85740,12 +85804,22 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "AESDecryptRoundUint8x16", + name: "AESDecryptLastRoundUint8x64", + argLen: 2, + generic: true, + }, + { + name: "AESDecryptOneRoundUint8x16", + argLen: 2, + generic: true, + }, + { + name: "AESDecryptOneRoundUint8x32", argLen: 2, generic: true, }, { - name: "AESDecryptRoundUint8x32", + name: "AESDecryptOneRoundUint8x64", argLen: 2, generic: true, }, @@ -85760,12 +85834,22 @@ var opcodeTable = [...]opInfo{ generic: true, }, { - name: "AESEncryptRoundUint8x16", + name: "AESEncryptLastRoundUint8x64", + argLen: 2, + generic: true, + }, + { + name: "AESEncryptOneRoundUint8x16", + argLen: 2, + generic: true, + }, + { + name: "AESEncryptOneRoundUint8x32", argLen: 2, generic: true, }, { - name: "AESEncryptRoundUint8x32", + name: "AESEncryptOneRoundUint8x64", argLen: 2, generic: true, }, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index dff3333372..042649f256 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -16,24 +16,36 @@ func rewriteValueAMD64(v *Value) bool { case OpAESDecryptLastRoundUint8x32: v.Op = OpAMD64VAESDECLAST256 return true - case OpAESDecryptRoundUint8x16: + case OpAESDecryptLastRoundUint8x64: + v.Op = OpAMD64VAESDECLAST512 + return true + case OpAESDecryptOneRoundUint8x16: v.Op = OpAMD64VAESDEC128 return true - case OpAESDecryptRoundUint8x32: + case OpAESDecryptOneRoundUint8x32: v.Op = OpAMD64VAESDEC256 return true + case OpAESDecryptOneRoundUint8x64: + v.Op = OpAMD64VAESDEC512 + return true case OpAESEncryptLastRoundUint8x16: v.Op = OpAMD64VAESENCLAST128 return true case OpAESEncryptLastRoundUint8x32: v.Op = OpAMD64VAESENCLAST256 return true - case OpAESEncryptRoundUint8x16: + case OpAESEncryptLastRoundUint8x64: + v.Op = OpAMD64VAESENCLAST512 + return true + case OpAESEncryptOneRoundUint8x16: v.Op = OpAMD64VAESENC128 return true - case OpAESEncryptRoundUint8x32: + case OpAESEncryptOneRoundUint8x32: v.Op = OpAMD64VAESENC256 return true + case OpAESEncryptOneRoundUint8x64: + v.Op = OpAMD64VAESENC512 + return true case OpAESInvMixColumnsUint32x4: v.Op = OpAMD64VAESIMC128 return true diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 5c941321a4..f3aa904f6c 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -14,12 +14,16 @@ const simdPackage = "simd" func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) { addF(simdPackage, "Uint8x16.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.AESDecryptRound", opLen2(ssa.OpAESDecryptRoundUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.AESDecryptLastRound", opLen2(ssa.OpAESDecryptLastRoundUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.AESDecryptOneRound", opLen2(ssa.OpAESDecryptOneRoundUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.AESDecryptOneRound", opLen2(ssa.OpAESDecryptOneRoundUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.AESDecryptOneRound", opLen2(ssa.OpAESDecryptOneRoundUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x16.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x32, types.TypeVec256), sys.AMD64) - addF(simdPackage, "Uint8x16.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.AESEncryptRound", opLen2(ssa.OpAESEncryptRoundUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.AESEncryptLastRound", opLen2(ssa.OpAESEncryptLastRoundUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x16.AESEncryptOneRound", opLen2(ssa.OpAESEncryptOneRoundUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.AESEncryptOneRound", opLen2(ssa.OpAESEncryptOneRoundUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.AESEncryptOneRound", opLen2(ssa.OpAESEncryptOneRoundUint8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint32x4.AESInvMixColumns", opLen1(ssa.OpAESInvMixColumnsUint32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint32x4.AESRoundKeyGenAssist", opLen1Imm8(ssa.OpAESRoundKeyGenAssistUint32x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int8x16.Abs", opLen1(ssa.OpAbsInt8x16, types.TypeVec128), sys.AMD64) diff --git a/src/simd/_gen/simdgen/ops/Others/categories.yaml b/src/simd/_gen/simdgen/ops/Others/categories.yaml index 3c8befb826..8ecf066e80 100644 --- a/src/simd/_gen/simdgen/ops/Others/categories.yaml +++ b/src/simd/_gen/simdgen/ops/Others/categories.yaml @@ -3,7 +3,7 @@ commutative: false documentation: !string |- // NAME counts the leading zeros of each element in x. -- go: AESEncryptRound +- go: AESEncryptOneRound commutative: false documentation: !string |- // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197. @@ -27,7 +27,7 @@ // result[1] = SubWord(x[1]) // result[2] = XOR(SubWord(RotWord(x[2])), r) // result[3] = SubWord(x[3]) -- go: AESDecryptRound +- go: AESDecryptOneRound commutative: false documentation: !string |- // NAME performs a series of operations in AES cipher algorithm defined in FIPS 197. diff --git a/src/simd/_gen/simdgen/ops/Others/go.yaml b/src/simd/_gen/simdgen/ops/Others/go.yaml index 77b9fc3783..f89d7ef82d 100644 --- a/src/simd/_gen/simdgen/ops/Others/go.yaml +++ b/src/simd/_gen/simdgen/ops/Others/go.yaml @@ -6,7 +6,7 @@ go: $t out: - *any -- go: AESEncryptRound +- go: AESEncryptOneRound asm: VAESENC in: - &uint8s @@ -33,7 +33,7 @@ name: rconVal out: - *uint32s -- go: AESDecryptRound +- go: AESDecryptOneRound asm: VAESDEC in: - *uint8s diff --git a/src/simd/_gen/simdgen/types.yaml b/src/simd/_gen/simdgen/types.yaml index f7a01cb360..9dccd1e764 100644 --- a/src/simd/_gen/simdgen/types.yaml +++ b/src/simd/_gen/simdgen/types.yaml @@ -83,6 +83,17 @@ in: !repeat - {class: vreg, go: Int64x4, base: "int", elemBits: 128, bits: 256, lanes: 4} - {class: vreg, go: Uint64x4, base: "uint", elemBits: 128, bits: 256, lanes: 4} +# Special shapes just to make VAES(ENC|DEC)(LAST)?512 work. +# The elemBits field of these shapes are wrong, it would be overwritten by overwriteElemBits. + - {class: vreg, go: Int8x32, base: "int", elemBits: 128, bits: 512, lanes: 32} + - {class: vreg, go: Uint8x32, base: "uint", elemBits: 128, bits: 512, lanes: 32} + - {class: vreg, go: Int16x16, base: "int", elemBits: 128, bits: 512, lanes: 16} + - {class: vreg, go: Uint16x16, base: "uint", elemBits: 128, bits: 512, lanes: 16} + - {class: vreg, go: Int32x8, base: "int", elemBits: 128, bits: 512, lanes: 8} + - {class: vreg, go: Uint32x8, base: "uint", elemBits: 128, bits: 512, lanes: 8} + - {class: vreg, go: Int64x4, base: "int", elemBits: 128, bits: 512, lanes: 4} + - {class: vreg, go: Uint64x4, base: "uint", elemBits: 128, bits: 512, lanes: 4} + - {class: immediate, go: Immediate} # TODO: we only support imms that are not used as value -- usually as instruction semantic predicate like VPCMP as of now. inVariant: !repeat - *types diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index ee472d1163..88b951990c 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -22,23 +22,39 @@ func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16 // Asm: VAESDECLAST, CPU Feature: AVX512VAES func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32 -/* AESDecryptRound */ +// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y) +// +// Asm: VAESDECLAST, CPU Feature: AVX512VAES +func (x Uint8x64) AESDecryptLastRound(y Uint32x16) Uint8x64 + +/* AESDecryptOneRound */ -// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. // y is the chunk of dw array in use. // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) // // Asm: VAESDEC, CPU Feature: AVX, AES -func (x Uint8x16) AESDecryptRound(y Uint32x4) Uint8x16 +func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16 + +// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) +// +// Asm: VAESDEC, CPU Feature: AVX512VAES +func (x Uint8x32) AESDecryptOneRound(y Uint32x8) Uint8x32 -// AESDecryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. // y is the chunk of dw array in use. // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) // // Asm: VAESDEC, CPU Feature: AVX512VAES -func (x Uint8x32) AESDecryptRound(y Uint32x8) Uint8x32 +func (x Uint8x64) AESDecryptOneRound(y Uint32x16) Uint8x64 /* AESEncryptLastRound */ @@ -58,23 +74,39 @@ func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16 // Asm: VAESENCLAST, CPU Feature: AVX512VAES func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32 -/* AESEncryptRound */ +// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey((ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENCLAST, CPU Feature: AVX512VAES +func (x Uint8x64) AESEncryptLastRound(y Uint32x16) Uint8x64 + +/* AESEncryptOneRound */ -// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. // y is the chunk of w array in use. // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) // // Asm: VAESENC, CPU Feature: AVX, AES -func (x Uint8x16) AESEncryptRound(y Uint32x4) Uint8x16 +func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16 + +// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENC, CPU Feature: AVX512VAES +func (x Uint8x32) AESEncryptOneRound(y Uint32x8) Uint8x32 -// AESEncryptRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. // x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. // y is the chunk of w array in use. // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) // // Asm: VAESENC, CPU Feature: AVX512VAES -func (x Uint8x32) AESEncryptRound(y Uint32x8) Uint8x32 +func (x Uint8x64) AESEncryptOneRound(y Uint32x16) Uint8x64 /* AESInvMixColumns */