// go.dev/issue/46027: some imports are missing for this submodule.
"crypto/aes/_asm/gcm": true,
+ "crypto/aes/_asm/standard": true,
"crypto/internal/bigmod/_asm": true,
"crypto/internal/edwards25519/field/_asm": true,
"crypto/md5/_asm": true,
--- /dev/null
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+ "os"
+ "strings"
+
+ . "github.com/mmcloughlin/avo/build"
+ "github.com/mmcloughlin/avo/ir"
+ . "github.com/mmcloughlin/avo/operand"
+ . "github.com/mmcloughlin/avo/reg"
+)
+
+//go:generate go run . -out ../../asm_amd64.s -pkg aes
+
+func main() {
+ Package("crypto/aes")
+ ConstraintExpr("!purego")
+ encryptBlockAsm()
+ decryptBlockAsm()
+ expandKeyAsm()
+ _expand_key_128()
+ _expand_key_192a()
+ _expand_key_192b()
+ _expand_key_256a()
+ _expand_key_256b()
+ Generate()
+
+ var internalFunctions []string = []string{
+ "·_expand_key_128<>",
+ "·_expand_key_192a<>",
+ "·_expand_key_192b<>",
+ "·_expand_key_256a<>",
+ "·_expand_key_256b<>",
+ }
+ removePeskyUnicodeDot(internalFunctions, "../../asm_amd64.s")
+}
+
+func encryptBlockAsm() {
+ Implement("encryptBlockAsm")
+ Attributes(NOSPLIT)
+ AllocLocal(0)
+
+ Load(Param("nr"), RCX)
+ Load(Param("xk"), RAX)
+ Load(Param("dst"), RDX)
+ Load(Param("src"), RBX)
+ MOVUPS(Mem{Base: AX}.Offset(0), X1)
+ MOVUPS(Mem{Base: BX}.Offset(0), X0)
+ ADDQ(Imm(16), RAX)
+ PXOR(X1, X0)
+ SUBQ(Imm(12), RCX)
+ JE(LabelRef("Lenc192"))
+ JB(LabelRef("Lenc128"))
+
+ Label("Lenc256")
+ MOVUPS(Mem{Base: AX}.Offset(0), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(16), X1)
+ AESENC(X1, X0)
+ ADDQ(Imm(32), RAX)
+
+ Label("Lenc192")
+ MOVUPS(Mem{Base: AX}.Offset(0), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(16), X1)
+ AESENC(X1, X0)
+ ADDQ(Imm(32), RAX)
+
+ Label("Lenc128")
+ MOVUPS(Mem{Base: AX}.Offset(0), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(16), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(32), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(48), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(64), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(80), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(96), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(112), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(128), X1)
+ AESENC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(144), X1)
+ AESENCLAST(X1, X0)
+ MOVUPS(X0, Mem{Base: DX}.Offset(0))
+ RET()
+}
+
+func decryptBlockAsm() {
+ Implement("decryptBlockAsm")
+ Attributes(NOSPLIT)
+ AllocLocal(0)
+
+ Load(Param("nr"), RCX)
+ Load(Param("xk"), RAX)
+ Load(Param("dst"), RDX)
+ Load(Param("src"), RBX)
+
+ MOVUPS(Mem{Base: AX}.Offset(0), X1)
+ MOVUPS(Mem{Base: BX}.Offset(0), X0)
+ ADDQ(Imm(16), RAX)
+ PXOR(X1, X0)
+ SUBQ(Imm(12), RCX)
+ JE(LabelRef("Ldec192"))
+ JB(LabelRef("Ldec128"))
+
+ Label("Ldec256")
+ MOVUPS(Mem{Base: AX}.Offset(0), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(16), X1)
+ AESDEC(X1, X0)
+ ADDQ(Imm(32), RAX)
+
+ Label("Ldec192")
+ MOVUPS(Mem{Base: AX}.Offset(0), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(16), X1)
+ AESDEC(X1, X0)
+ ADDQ(Imm(32), RAX)
+
+ Label("Ldec128")
+ MOVUPS(Mem{Base: AX}.Offset(0), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(16), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(32), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(48), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(64), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(80), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(96), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(112), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(128), X1)
+ AESDEC(X1, X0)
+ MOVUPS(Mem{Base: AX}.Offset(144), X1)
+ AESDECLAST(X1, X0)
+ MOVUPS(X0, Mem{Base: DX}.Offset(0))
+ RET()
+}
+
+// Note that round keys are stored in uint128 format, not uint32
+func expandKeyAsm() {
+ Implement("expandKeyAsm")
+ Attributes(NOSPLIT)
+ AllocLocal(0)
+
+ Load(Param("nr"), RCX)
+ Load(Param("key"), RAX)
+ Load(Param("enc"), RBX)
+ Load(Param("dec"), RDX)
+
+ MOVUPS(Mem{Base: AX}, X0)
+ Comment("enc")
+ MOVUPS(X0, Mem{Base: BX})
+ ADDQ(Imm(16), RBX)
+ PXOR(X4, X4) // _expand_key_* expect X4 to be zero
+ CMPL(ECX, Imm(12))
+ JE(LabelRef("Lexp_enc192"))
+ JB(LabelRef("Lexp_enc128"))
+
+ Lexp_enc256()
+ Lexp_enc192()
+ Lexp_enc128()
+ Lexp_dec()
+ Lexp_dec_loop()
+}
+
+func Lexp_enc256() {
+ Label("Lexp_enc256")
+ MOVUPS(Mem{Base: AX}.Offset(16), X2)
+ MOVUPS(X2, Mem{Base: BX})
+ ADDQ(Imm(16), RBX)
+
+ var rcon uint64 = 1
+ for i := 0; i < 6; i++ {
+ AESKEYGENASSIST(Imm(rcon), X2, X1)
+ CALL(LabelRef("_expand_key_256a<>(SB)"))
+ AESKEYGENASSIST(Imm(rcon), X0, X1)
+ CALL(LabelRef("_expand_key_256b<>(SB)"))
+ rcon <<= 1
+ }
+ AESKEYGENASSIST(Imm(0x40), X2, X1)
+ CALL(LabelRef("_expand_key_256a<>(SB)"))
+ JMP(LabelRef("Lexp_dec"))
+}
+
+func Lexp_enc192() {
+ Label("Lexp_enc192")
+ MOVQ(Mem{Base: AX}.Offset(16), X2)
+
+ var rcon uint64 = 1
+ for i := 0; i < 8; i++ {
+ AESKEYGENASSIST(Imm(rcon), X2, X1)
+ if i%2 == 0 {
+ CALL(LabelRef("_expand_key_192a<>(SB)"))
+ } else {
+ CALL(LabelRef("_expand_key_192b<>(SB)"))
+ }
+ rcon <<= 1
+ }
+ JMP(LabelRef("Lexp_dec"))
+}
+
+func Lexp_enc128() {
+ Label("Lexp_enc128")
+ var rcon uint64 = 1
+ for i := 0; i < 8; i++ {
+ AESKEYGENASSIST(Imm(rcon), X0, X1)
+ CALL(LabelRef("_expand_key_128<>(SB)"))
+ rcon <<= 1
+ }
+ AESKEYGENASSIST(Imm(0x1b), X0, X1)
+ CALL(LabelRef("_expand_key_128<>(SB)"))
+ AESKEYGENASSIST(Imm(0x36), X0, X1)
+ CALL(LabelRef("_expand_key_128<>(SB)"))
+}
+
+func Lexp_dec() {
+ Label("Lexp_dec")
+ Comment("dec")
+ SUBQ(Imm(16), RBX)
+ MOVUPS(Mem{Base: BX}, X1)
+ MOVUPS(X1, Mem{Base: DX})
+ DECQ(RCX)
+}
+
+func Lexp_dec_loop() {
+ Label("Lexp_dec_loop")
+ MOVUPS(Mem{Base: BX}.Offset(-16), X1)
+ AESIMC(X1, X0)
+ MOVUPS(X0, Mem{Base: DX}.Offset(16))
+ SUBQ(Imm(16), RBX)
+ ADDQ(Imm(16), RDX)
+ DECQ(RCX)
+ JNZ(LabelRef("Lexp_dec_loop"))
+ MOVUPS(Mem{Base: BX}.Offset(-16), X0)
+ MOVUPS(X0, Mem{Base: DX}.Offset(16))
+ RET()
+}
+
+func _expand_key_128() {
+ Function("_expand_key_128<>")
+ Attributes(NOSPLIT)
+ AllocLocal(0)
+
+ PSHUFD(Imm(0xff), X1, X1)
+ SHUFPS(Imm(0x10), X0, X4)
+ PXOR(X4, X0)
+ SHUFPS(Imm(0x8c), X0, X4)
+ PXOR(X4, X0)
+ PXOR(X1, X0)
+ MOVUPS(X0, Mem{Base: BX})
+ ADDQ(Imm(16), RBX)
+ RET()
+}
+
+func _expand_key_192a() {
+ Function("_expand_key_192a<>")
+ Attributes(NOSPLIT)
+ AllocLocal(0)
+
+ PSHUFD(Imm(0x55), X1, X1)
+ SHUFPS(Imm(0x10), X0, X4)
+ PXOR(X4, X0)
+ SHUFPS(Imm(0x8c), X0, X4)
+ PXOR(X4, X0)
+ PXOR(X1, X0)
+
+ MOVAPS(X2, X5)
+ MOVAPS(X2, X6)
+ PSLLDQ(Imm(0x4), X5)
+ PSHUFD(Imm(0xff), X0, X3)
+ PXOR(X3, X2)
+ PXOR(X5, X2)
+
+ MOVAPS(X0, X1)
+ SHUFPS(Imm(0x44), X0, X6)
+ MOVUPS(X6, Mem{Base: BX})
+ SHUFPS(Imm(0x4e), X2, X1)
+ MOVUPS(X1, Mem{Base: BX}.Offset(16))
+ ADDQ(Imm(32), RBX)
+ RET()
+}
+
+func _expand_key_192b() {
+ Function("_expand_key_192b<>")
+ Attributes(NOSPLIT)
+ AllocLocal(0)
+
+ PSHUFD(Imm(0x55), X1, X1)
+ SHUFPS(Imm(0x10), X0, X4)
+ PXOR(X4, X0)
+ SHUFPS(Imm(0x8c), X0, X4)
+ PXOR(X4, X0)
+ PXOR(X1, X0)
+
+ MOVAPS(X2, X5)
+ PSLLDQ(Imm(0x4), X5)
+ PSHUFD(Imm(0xff), X0, X3)
+ PXOR(X3, X2)
+ PXOR(X5, X2)
+
+ MOVUPS(X0, Mem{Base: BX})
+ ADDQ(Imm(16), RBX)
+ RET()
+}
+
+func _expand_key_256a() {
+ Function("_expand_key_256a<>")
+ Attributes(NOSPLIT)
+ AllocLocal(0)
+
+ // Hack to get Avo to emit:
+ // JMP _expand_key_128<>(SB)
+ Instruction(&ir.Instruction{
+ Opcode: "JMP",
+ Operands: []Op{
+ LabelRef("_expand_key_128<>(SB)"),
+ },
+ })
+}
+
+func _expand_key_256b() {
+ Function("_expand_key_256b<>")
+ Attributes(NOSPLIT)
+ AllocLocal(0)
+
+ PSHUFD(Imm(0xaa), X1, X1)
+ SHUFPS(Imm(0x10), X2, X4)
+ PXOR(X4, X2)
+ SHUFPS(Imm(0x8c), X2, X4)
+ PXOR(X4, X2)
+ PXOR(X1, X2)
+
+ MOVUPS(X2, Mem{Base: BX})
+ ADDQ(Imm(16), RBX)
+ RET()
+}
+
+const ThatPeskyUnicodeDot = "\u00b7"
+
+// removePeskyUnicodeDot strips the dot from the relevant TEXT directives such that they
+// can exist as internal assembly functions
+//
+// Avo v0.6.0 does not support the generation of internal assembly functions. Go's unicode
+// dot tells the compiler to link a TEXT symbol to a function in the current Go package
+// (or another package if specified). Avo unconditionally prepends the unicode dot to all
+// TEXT symbols, making it impossible to emit an internal function without this hack.
+//
+// There is a pending PR to add internal functions to Avo:
+// https://github.com/mmcloughlin/avo/pull/443
+//
+// If merged it should allow the usage of InternalFunction("NAME") for the specified functions
+func removePeskyUnicodeDot(internalFunctions []string, target string) {
+ bytes, err := os.ReadFile(target)
+ if err != nil {
+ panic(err)
+ }
+
+ content := string(bytes)
+
+ for _, from := range internalFunctions {
+ to := strings.ReplaceAll(from, ThatPeskyUnicodeDot, "")
+ content = strings.ReplaceAll(content, from, to)
+ }
+
+ err = os.WriteFile(target, []byte(content), 0644)
+ if err != nil {
+ panic(err)
+ }
+}
--- /dev/null
+module std/crypto/aes/_asm/standard
+
+go 1.24
+
+require github.com/mmcloughlin/avo v0.6.0
+
+require (
+ golang.org/x/mod v0.20.0 // indirect
+ golang.org/x/sync v0.8.0 // indirect
+ golang.org/x/tools v0.24.0 // indirect
+)
--- /dev/null
+github.com/mmcloughlin/avo v0.6.0 h1:QH6FU8SKoTLaVs80GA8TJuLNkUYl4VokHKlPhVDg4YY=
+github.com/mmcloughlin/avo v0.6.0/go.mod h1:8CoAGaCSYXtCPR+8y18Y9aB/kxb8JSS6FRI7mSkvD+8=
+golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0=
+golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
+golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24=
+golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ=
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run asm_amd64.go -out ../../asm_amd64.s -pkg aes. DO NOT EDIT.
//go:build !purego
#include "textflag.h"
-// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
-TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
- MOVQ nr+0(FP), CX
- MOVQ xk+8(FP), AX
- MOVQ dst+16(FP), DX
- MOVQ src+24(FP), BX
- MOVUPS 0(AX), X1
- MOVUPS 0(BX), X0
- ADDQ $16, AX
- PXOR X1, X0
- SUBQ $12, CX
- JE Lenc192
- JB Lenc128
-Lenc256:
- MOVUPS 0(AX), X1
+// func encryptBlockAsm(nr int, xk *uint32, dst *byte, src *byte)
+// Requires: AES, SSE, SSE2
+TEXT ·encryptBlockAsm(SB), NOSPLIT, $0-32
+ MOVQ nr+0(FP), CX
+ MOVQ xk+8(FP), AX
+ MOVQ dst+16(FP), DX
+ MOVQ src+24(FP), BX
+ MOVUPS (AX), X1
+ MOVUPS (BX), X0
+ ADDQ $0x10, AX
+ PXOR X1, X0
+ SUBQ $0x0c, CX
+ JE Lenc192
+ JB Lenc128
+ MOVUPS (AX), X1
AESENC X1, X0
MOVUPS 16(AX), X1
AESENC X1, X0
- ADDQ $32, AX
+ ADDQ $0x20, AX
+
Lenc192:
- MOVUPS 0(AX), X1
+ MOVUPS (AX), X1
AESENC X1, X0
MOVUPS 16(AX), X1
AESENC X1, X0
- ADDQ $32, AX
+ ADDQ $0x20, AX
+
Lenc128:
- MOVUPS 0(AX), X1
- AESENC X1, X0
- MOVUPS 16(AX), X1
- AESENC X1, X0
- MOVUPS 32(AX), X1
- AESENC X1, X0
- MOVUPS 48(AX), X1
- AESENC X1, X0
- MOVUPS 64(AX), X1
- AESENC X1, X0
- MOVUPS 80(AX), X1
- AESENC X1, X0
- MOVUPS 96(AX), X1
- AESENC X1, X0
- MOVUPS 112(AX), X1
- AESENC X1, X0
- MOVUPS 128(AX), X1
- AESENC X1, X0
- MOVUPS 144(AX), X1
+ MOVUPS (AX), X1
+ AESENC X1, X0
+ MOVUPS 16(AX), X1
+ AESENC X1, X0
+ MOVUPS 32(AX), X1
+ AESENC X1, X0
+ MOVUPS 48(AX), X1
+ AESENC X1, X0
+ MOVUPS 64(AX), X1
+ AESENC X1, X0
+ MOVUPS 80(AX), X1
+ AESENC X1, X0
+ MOVUPS 96(AX), X1
+ AESENC X1, X0
+ MOVUPS 112(AX), X1
+ AESENC X1, X0
+ MOVUPS 128(AX), X1
+ AESENC X1, X0
+ MOVUPS 144(AX), X1
AESENCLAST X1, X0
- MOVUPS X0, 0(DX)
+ MOVUPS X0, (DX)
RET
-// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
-TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
- MOVQ nr+0(FP), CX
- MOVQ xk+8(FP), AX
- MOVQ dst+16(FP), DX
- MOVQ src+24(FP), BX
- MOVUPS 0(AX), X1
- MOVUPS 0(BX), X0
- ADDQ $16, AX
- PXOR X1, X0
- SUBQ $12, CX
- JE Ldec192
- JB Ldec128
-Ldec256:
- MOVUPS 0(AX), X1
+// func decryptBlockAsm(nr int, xk *uint32, dst *byte, src *byte)
+// Requires: AES, SSE, SSE2
+TEXT ·decryptBlockAsm(SB), NOSPLIT, $0-32
+ MOVQ nr+0(FP), CX
+ MOVQ xk+8(FP), AX
+ MOVQ dst+16(FP), DX
+ MOVQ src+24(FP), BX
+ MOVUPS (AX), X1
+ MOVUPS (BX), X0
+ ADDQ $0x10, AX
+ PXOR X1, X0
+ SUBQ $0x0c, CX
+ JE Ldec192
+ JB Ldec128
+ MOVUPS (AX), X1
AESDEC X1, X0
MOVUPS 16(AX), X1
AESDEC X1, X0
- ADDQ $32, AX
+ ADDQ $0x20, AX
+
Ldec192:
- MOVUPS 0(AX), X1
+ MOVUPS (AX), X1
AESDEC X1, X0
MOVUPS 16(AX), X1
AESDEC X1, X0
- ADDQ $32, AX
+ ADDQ $0x20, AX
+
Ldec128:
- MOVUPS 0(AX), X1
- AESDEC X1, X0
- MOVUPS 16(AX), X1
- AESDEC X1, X0
- MOVUPS 32(AX), X1
- AESDEC X1, X0
- MOVUPS 48(AX), X1
- AESDEC X1, X0
- MOVUPS 64(AX), X1
- AESDEC X1, X0
- MOVUPS 80(AX), X1
- AESDEC X1, X0
- MOVUPS 96(AX), X1
- AESDEC X1, X0
- MOVUPS 112(AX), X1
- AESDEC X1, X0
- MOVUPS 128(AX), X1
- AESDEC X1, X0
- MOVUPS 144(AX), X1
+ MOVUPS (AX), X1
+ AESDEC X1, X0
+ MOVUPS 16(AX), X1
+ AESDEC X1, X0
+ MOVUPS 32(AX), X1
+ AESDEC X1, X0
+ MOVUPS 48(AX), X1
+ AESDEC X1, X0
+ MOVUPS 64(AX), X1
+ AESDEC X1, X0
+ MOVUPS 80(AX), X1
+ AESDEC X1, X0
+ MOVUPS 96(AX), X1
+ AESDEC X1, X0
+ MOVUPS 112(AX), X1
+ AESDEC X1, X0
+ MOVUPS 128(AX), X1
+ AESDEC X1, X0
+ MOVUPS 144(AX), X1
AESDECLAST X1, X0
- MOVUPS X0, 0(DX)
+ MOVUPS X0, (DX)
RET
-// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
-// Note that round keys are stored in uint128 format, not uint32
-TEXT ·expandKeyAsm(SB),NOSPLIT,$0
- MOVQ nr+0(FP), CX
- MOVQ key+8(FP), AX
- MOVQ enc+16(FP), BX
- MOVQ dec+24(FP), DX
+// func expandKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
+// Requires: AES, SSE, SSE2
+TEXT ·expandKeyAsm(SB), NOSPLIT, $0-32
+ MOVQ nr+0(FP), CX
+ MOVQ key+8(FP), AX
+ MOVQ enc+16(FP), BX
+ MOVQ dec+24(FP), DX
MOVUPS (AX), X0
+
// enc
- MOVUPS X0, (BX)
- ADDQ $16, BX
- PXOR X4, X4 // _expand_key_* expect X4 to be zero
- CMPL CX, $12
- JE Lexp_enc192
- JB Lexp_enc128
-Lexp_enc256:
- MOVUPS 16(AX), X2
- MOVUPS X2, (BX)
- ADDQ $16, BX
+ MOVUPS X0, (BX)
+ ADDQ $0x10, BX
+ PXOR X4, X4
+ CMPL CX, $0x0c
+ JE Lexp_enc192
+ JB Lexp_enc128
+ MOVUPS 16(AX), X2
+ MOVUPS X2, (BX)
+ ADDQ $0x10, BX
AESKEYGENASSIST $0x01, X2, X1
- CALL _expand_key_256a<>(SB)
+ CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x01, X0, X1
- CALL _expand_key_256b<>(SB)
+ CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x02, X2, X1
- CALL _expand_key_256a<>(SB)
+ CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x02, X0, X1
- CALL _expand_key_256b<>(SB)
+ CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x04, X2, X1
- CALL _expand_key_256a<>(SB)
+ CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x04, X0, X1
- CALL _expand_key_256b<>(SB)
+ CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x08, X2, X1
- CALL _expand_key_256a<>(SB)
+ CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x08, X0, X1
- CALL _expand_key_256b<>(SB)
+ CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x10, X2, X1
- CALL _expand_key_256a<>(SB)
+ CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x10, X0, X1
- CALL _expand_key_256b<>(SB)
+ CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x20, X2, X1
- CALL _expand_key_256a<>(SB)
+ CALL _expand_key_256a<>(SB)
AESKEYGENASSIST $0x20, X0, X1
- CALL _expand_key_256b<>(SB)
+ CALL _expand_key_256b<>(SB)
AESKEYGENASSIST $0x40, X2, X1
- CALL _expand_key_256a<>(SB)
- JMP Lexp_dec
+ CALL _expand_key_256a<>(SB)
+ JMP Lexp_dec
+
Lexp_enc192:
- MOVQ 16(AX), X2
+ MOVQ 16(AX), X2
AESKEYGENASSIST $0x01, X2, X1
- CALL _expand_key_192a<>(SB)
+ CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x02, X2, X1
- CALL _expand_key_192b<>(SB)
+ CALL _expand_key_192b<>(SB)
AESKEYGENASSIST $0x04, X2, X1
- CALL _expand_key_192a<>(SB)
+ CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x08, X2, X1
- CALL _expand_key_192b<>(SB)
+ CALL _expand_key_192b<>(SB)
AESKEYGENASSIST $0x10, X2, X1
- CALL _expand_key_192a<>(SB)
+ CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x20, X2, X1
- CALL _expand_key_192b<>(SB)
+ CALL _expand_key_192b<>(SB)
AESKEYGENASSIST $0x40, X2, X1
- CALL _expand_key_192a<>(SB)
+ CALL _expand_key_192a<>(SB)
AESKEYGENASSIST $0x80, X2, X1
- CALL _expand_key_192b<>(SB)
- JMP Lexp_dec
+ CALL _expand_key_192b<>(SB)
+ JMP Lexp_dec
+
Lexp_enc128:
AESKEYGENASSIST $0x01, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x02, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x04, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x08, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x10, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x20, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x40, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x80, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x1b, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
AESKEYGENASSIST $0x36, X0, X1
- CALL _expand_key_128<>(SB)
+ CALL _expand_key_128<>(SB)
+
Lexp_dec:
// dec
- SUBQ $16, BX
+ SUBQ $0x10, BX
MOVUPS (BX), X1
MOVUPS X1, (DX)
- DECQ CX
+ DECQ CX
+
Lexp_dec_loop:
MOVUPS -16(BX), X1
AESIMC X1, X0
MOVUPS X0, 16(DX)
- SUBQ $16, BX
- ADDQ $16, DX
- DECQ CX
- JNZ Lexp_dec_loop
+ SUBQ $0x10, BX
+ ADDQ $0x10, DX
+ DECQ CX
+ JNZ Lexp_dec_loop
MOVUPS -16(BX), X0
MOVUPS X0, 16(DX)
RET
-TEXT _expand_key_128<>(SB),NOSPLIT,$0
+// func _expand_key_128<>()
+// Requires: SSE, SSE2
+TEXT _expand_key_128<>(SB), NOSPLIT, $0
PSHUFD $0xff, X1, X1
SHUFPS $0x10, X0, X4
- PXOR X4, X0
+ PXOR X4, X0
SHUFPS $0x8c, X0, X4
- PXOR X4, X0
- PXOR X1, X0
+ PXOR X4, X0
+ PXOR X1, X0
MOVUPS X0, (BX)
- ADDQ $16, BX
+ ADDQ $0x10, BX
RET
-TEXT _expand_key_192a<>(SB),NOSPLIT,$0
+// func _expand_key_192a<>()
+// Requires: SSE, SSE2
+TEXT _expand_key_192a<>(SB), NOSPLIT, $0
PSHUFD $0x55, X1, X1
SHUFPS $0x10, X0, X4
- PXOR X4, X0
+ PXOR X4, X0
SHUFPS $0x8c, X0, X4
- PXOR X4, X0
- PXOR X1, X0
-
+ PXOR X4, X0
+ PXOR X1, X0
MOVAPS X2, X5
MOVAPS X2, X6
- PSLLDQ $0x4, X5
+ PSLLDQ $0x04, X5
PSHUFD $0xff, X0, X3
- PXOR X3, X2
- PXOR X5, X2
-
+ PXOR X3, X2
+ PXOR X5, X2
MOVAPS X0, X1
SHUFPS $0x44, X0, X6
MOVUPS X6, (BX)
SHUFPS $0x4e, X2, X1
MOVUPS X1, 16(BX)
- ADDQ $32, BX
+ ADDQ $0x20, BX
RET
-TEXT _expand_key_192b<>(SB),NOSPLIT,$0
+// func _expand_key_192b<>()
+// Requires: SSE, SSE2
+TEXT _expand_key_192b<>(SB), NOSPLIT, $0
PSHUFD $0x55, X1, X1
SHUFPS $0x10, X0, X4
- PXOR X4, X0
+ PXOR X4, X0
SHUFPS $0x8c, X0, X4
- PXOR X4, X0
- PXOR X1, X0
-
+ PXOR X4, X0
+ PXOR X1, X0
MOVAPS X2, X5
- PSLLDQ $0x4, X5
+ PSLLDQ $0x04, X5
PSHUFD $0xff, X0, X3
- PXOR X3, X2
- PXOR X5, X2
-
+ PXOR X3, X2
+ PXOR X5, X2
MOVUPS X0, (BX)
- ADDQ $16, BX
+ ADDQ $0x10, BX
RET
-TEXT _expand_key_256a<>(SB),NOSPLIT,$0
+// func _expand_key_256a<>()
+TEXT _expand_key_256a<>(SB), NOSPLIT, $0
JMP _expand_key_128<>(SB)
-TEXT _expand_key_256b<>(SB),NOSPLIT,$0
+// func _expand_key_256b<>()
+// Requires: SSE, SSE2
+TEXT _expand_key_256b<>(SB), NOSPLIT, $0
PSHUFD $0xaa, X1, X1
SHUFPS $0x10, X2, X4
- PXOR X4, X2
+ PXOR X4, X2
SHUFPS $0x8c, X2, X4
- PXOR X4, X2
- PXOR X1, X2
-
+ PXOR X4, X2
+ PXOR X1, X2
MOVUPS X2, (BX)
- ADDQ $16, BX
+ ADDQ $0x10, BX
RET
// See go.dev/issue/46027: some imports are missing for this submodule.
"crypto/aes/_asm/gcm": true,
+ "crypto/aes/_asm/standard": true,
"crypto/internal/bigmod/_asm": true,
"crypto/internal/edwards25519/field/_asm": true,
"crypto/md5/_asm": true,