#undef ROUNDS
#undef KEY
#undef TMP
-#undef OUTPERM
-#undef OUTMASK
-#undef OUTHEAD
-#undef OUTTAIL
// CBC encrypt or decrypt
// R3 src
// Register usage:
// R9: ROUNDS
// R10: Index
-// V0: initialized to 0
-// V3: initialized to mask
// V4: IV
// V5: SRC
-// V6: IV perm mask
// V7: DST
-// V10: KEY perm mask
#define INP R3
#define OUT R4
#define IDX R10
#define RNDKEY0 V0
-#define RNDKEY1 V1
#define INOUT V2
#define TMP V3
#define IVEC V4
-#define INPTAIL V5
-#define INPPERM V6
-#define OUTHEAD V7
-#define OUTPERM V8
-#define OUTMASK V9
-#define KEYPERM V10
// Vector loads are done using LVX followed by
// a VPERM using mask generated from previous
MOVD enc+40(FP), ENC
MOVD nr+48(FP), ROUNDS
- CMPU LEN, $16 // cmpldi r5,16
- BC 14, 0, LR // bltlr-
- CMPW ENC, $0 // cmpwi r8,0
- MOVD $15, IDX // li r10,15
- VXOR RNDKEY0, RNDKEY0, RNDKEY0 // vxor v0,v0,v0
- VSPLTISB $0xf, TMP // vspltisb $0xf,v3
-
- LVX (IVP)(R0), IVEC // lvx v4,r0,r7
- LVSL (IVP)(R0), INPPERM // lvsl v6,r0,r7
- LVX (IVP)(IDX), INPTAIL // lvx v5,r10,r7
- VXOR INPPERM, TMP, INPPERM // vxor v3, v6, v6
- VPERM IVEC, INPTAIL, INPPERM, IVEC // vperm v4,v4,v5,v6
- NEG INP, R11 // neg r11,r3
- LVSR (KEY)(R0), KEYPERM // lvsr v10,r0,r6
- LVSR (R11)(R0), V6 // lvsr v6,r0,r11
- LVX (INP)(R0), INPTAIL // lvx v5,r0,r3
- ADD $15, INP // addi r3,r3,15
- VXOR INPPERM, TMP, INPPERM // vxor v6, v3, v6
- LVSL (OUT)(R0), OUTPERM // lvsl v8,r0,r4
- VSPLTISB $-1, OUTMASK // vspltisb v9,-1
- LVX (OUT)(R0), OUTHEAD // lvx v7,r0,r4
- VPERM OUTMASK, RNDKEY0, OUTPERM, OUTMASK // vperm v9,v9,v0,v8
- VXOR OUTPERM, TMP, OUTPERM // vxor v8, v3, v8
- SRW $1, ROUNDS // rlwinm r9,r9,31,1,31
-
- MOVD $16, IDX // li r10,16
+#ifdef GOARCH_ppc64le
+ MOVD $·rcon(SB), R11
+ LVX (R11), ESPERM // Permute value for P8_ macros.
+#endif
+
+ CMPU LEN, $16 // cmpldi r5,16
+ BC 14, 0, LR // bltlr-, return if len < 16.
+ CMPW ENC, $0 // cmpwi r8,0
+
+ P8_LXVB16X(IVP, R0, IVEC) // load ivec in BE register order
+
+ SRW $1, ROUNDS // rlwinm r9,r9,31,1,31
+ MOVD $0, IDX // li r10,0
ADD $-1, ROUNDS // addi r9,r9,-1
BEQ Lcbc_dec // beq
PCALIGN $16
// Outer loop: initialize encrypted value (INOUT)
// Load input (INPTAIL) ivec (IVEC)
Lcbc_enc:
- VOR INPTAIL, INPTAIL, INOUT // vor v2,v5,v5
- LVX (INP)(R0), INPTAIL // lvx v5,r0,r3
+ P8_LXVB16X(INP, R0, INOUT) // load text in BE vreg order
ADD $16, INP // addi r3,r3,16
MOVD ROUNDS, CTR // mtctr r9
ADD $-16, LEN // addi r5,r5,-16
- LVX (KEY)(R0), RNDKEY0 // lvx v0,r0,r6
- VPERM INOUT, INPTAIL, INPPERM, INOUT // vperm v2,v2,v5,v6
- LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6
+ P8_LXV(KEY, IDX, RNDKEY0) // load first xkey
ADD $16, IDX // addi r10,r10,16
- VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v1,v0,v10
VXOR INOUT, RNDKEY0, INOUT // vxor v2,v2,v0
- LVX (KEY)(IDX), RNDKEY0 // lvx v0,r10,r6
- ADD $16, IDX // addi r10,r10,16
VXOR INOUT, IVEC, INOUT // vxor v2,v2,v4
- // Encryption loop of INOUT using RNDKEY0 and RNDKEY1
+ // Encryption loop of INOUT using RNDKEY0
Loop_cbc_enc:
- VPERM RNDKEY0, RNDKEY1, KEYPERM, RNDKEY1 // vperm v1,v1,v0,v10
- VCIPHER INOUT, RNDKEY1, INOUT // vcipher v2,v2,v1
- LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6
+ P8_LXV(KEY, IDX, RNDKEY0) // load next xkey
+ VCIPHER INOUT, RNDKEY0, INOUT // vcipher v2,v2,v1
ADD $16, IDX // addi r10,r10,16
- VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v0,v1,v10
- VCIPHER INOUT, RNDKEY0, INOUT // vcipher v2,v2,v0
- LVX (KEY)(IDX), RNDKEY0 // lvx v0,r10,r6
+ P8_LXV(KEY, IDX, RNDKEY0) // load next xkey
+ VCIPHER INOUT, RNDKEY0, INOUT // vcipher v2,v2,v1
ADD $16, IDX // addi r10,r10,16
- BC 16, 0, Loop_cbc_enc // bdnz Loop_cbc_enc
+ BDNZ Loop_cbc_enc
// Encrypt tail values and store INOUT
- VPERM RNDKEY0, RNDKEY1, KEYPERM, RNDKEY1 // vperm v1,v1,v0,v10
- VCIPHER INOUT, RNDKEY1, INOUT // vcipher v2,v2,v1
- LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6
- MOVD $16, IDX // li r10,16
- VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v0,v1,v10
+ P8_LXV(KEY, IDX, RNDKEY0) // load next xkey
+ VCIPHER INOUT, RNDKEY0, INOUT // vcipher v2,v2,v1
+ ADD $16, IDX // addi r10,r10,16
+ P8_LXV(KEY, IDX, RNDKEY0) // load final xkey
VCIPHERLAST INOUT, RNDKEY0, IVEC // vcipherlast v4,v2,v0
+ MOVD $0, IDX // reset key index for next block
CMPU LEN, $16 // cmpldi r5,16
- VPERM IVEC, IVEC, OUTPERM, TMP // vperm v3,v4,v4,v8
- VSEL OUTHEAD, TMP, OUTMASK, INOUT // vsel v2,v7,v3,v9
- VOR TMP, TMP, OUTHEAD // vor v7,v3,v3
- STVX INOUT, (OUT)(R0) // stvx v2,r0,r4
+ P8_STXVB16X(IVEC, OUT, R0) // store ciphertext in BE order
ADD $16, OUT // addi r4,r4,16
BGE Lcbc_enc // bge Lcbc_enc
BR Lcbc_done // b Lcbc_done
// Outer loop: initialize decrypted value (INOUT)
// Load input (INPTAIL) ivec (IVEC)
Lcbc_dec:
- VOR INPTAIL, INPTAIL, TMP // vor v3,v5,v5
- LVX (INP)(R0), INPTAIL // lvx v5,r0,r3
+ P8_LXVB16X(INP, R0, TMP) // load ciphertext in BE vreg order
ADD $16, INP // addi r3,r3,16
MOVD ROUNDS, CTR // mtctr r9
ADD $-16, LEN // addi r5,r5,-16
- LVX (KEY)(R0), RNDKEY0 // lvx v0,r0,r6
- VPERM TMP, INPTAIL, INPPERM, TMP // vperm v3,v3,v5,v6
- LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6
+ P8_LXV(KEY, IDX, RNDKEY0) // load first xkey
ADD $16, IDX // addi r10,r10,16
- VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v1,v0,v10
VXOR TMP, RNDKEY0, INOUT // vxor v2,v3,v0
- LVX (KEY)(IDX), RNDKEY0 // lvx v0,r10,r6
- ADD $16, IDX // addi r10,r10,16
PCALIGN $16
- // Decryption loop of INOUT using RNDKEY0 and RNDKEY1
+ // Decryption loop of INOUT using RNDKEY0
Loop_cbc_dec:
- VPERM RNDKEY0, RNDKEY1, KEYPERM, RNDKEY1 // vperm v1,v0,v1,v10
- VNCIPHER INOUT, RNDKEY1, INOUT // vncipher v2,v2,v1
- LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6
+ P8_LXV(KEY, IDX, RNDKEY0) // load next xkey
ADD $16, IDX // addi r10,r10,16
- VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v1,v0,v10
- VNCIPHER INOUT, RNDKEY0, INOUT // vncipher v2,v2,v0
- LVX (KEY)(IDX), RNDKEY0 // lvx v0,r10,r6
+ VNCIPHER INOUT, RNDKEY0, INOUT // vncipher v2,v2,v1
+ P8_LXV(KEY, IDX, RNDKEY0) // load next xkey
ADD $16, IDX // addi r10,r10,16
- BC 16, 0, Loop_cbc_dec // bdnz
+ VNCIPHER INOUT, RNDKEY0, INOUT // vncipher v2,v2,v0
+ BDNZ Loop_cbc_dec
// Decrypt tail values and store INOUT
- VPERM RNDKEY0, RNDKEY1, KEYPERM, RNDKEY1 // vperm v1,v0,v1,v10
- VNCIPHER INOUT, RNDKEY1, INOUT // vncipher v2,v2,v1
- LVX (KEY)(IDX), RNDKEY1 // lvx v1,r10,r6
- MOVD $16, IDX // li r10,16
- VPERM RNDKEY1, RNDKEY0, KEYPERM, RNDKEY0 // vperm v0,v1,v0,v10
+ P8_LXV(KEY, IDX, RNDKEY0) // load next xkey
+ ADD $16, IDX // addi r10,r10,16
+ VNCIPHER INOUT, RNDKEY0, INOUT // vncipher v2,v2,v1
+ P8_LXV(KEY, IDX, RNDKEY0) // load final xkey
+ MOVD $0, IDX // li r10,0
VNCIPHERLAST INOUT, RNDKEY0, INOUT // vncipherlast v2,v2,v0
CMPU LEN, $16 // cmpldi r5,16
VXOR INOUT, IVEC, INOUT // vxor v2,v2,v4
VOR TMP, TMP, IVEC // vor v4,v3,v3
- VPERM INOUT, INOUT, OUTPERM, TMP // vperm v3,v2,v2,v8
- VSEL OUTHEAD, TMP, OUTMASK, INOUT // vsel v2,v7,v3,v9
- VOR TMP, TMP, OUTHEAD // vor v7,v3,v3
- STVX INOUT, (OUT)(R0) // stvx v2,r0,r4
+ P8_STXVB16X(INOUT, OUT, R0) // store text in BE order
ADD $16, OUT // addi r4,r4,16
BGE Lcbc_dec // bge
Lcbc_done:
- ADD $-1, OUT // addi r4,r4,-1
- LVX (OUT)(R0), INOUT // lvx v2,r0,r4
- VSEL OUTHEAD, INOUT, OUTMASK, INOUT // vsel v2,v7,v2,v9
- STVX INOUT, (OUT)(R0) // stvx v2,r0,r4
- NEG IVP, ENC // neg r8,r7
- MOVD $15, IDX // li r10,15
- VXOR RNDKEY0, RNDKEY0, RNDKEY0 // vxor v0,v0,v0
- VSPLTISB $-1, OUTMASK // vspltisb v9,-1
- VSPLTISB $0xf, TMP // vspltisb v3, 0xf
- LVSR (ENC)(R0), OUTPERM // lvsl v8,r0,r8
- VPERM OUTMASK, RNDKEY0, OUTPERM, OUTMASK // vperm v9,v9,v0,v8
- VXOR OUTPERM, TMP, OUTPERM // vxor v9, v3, v9
- LVX (IVP)(R0), OUTHEAD // lvx v7,r0,r7
- VPERM IVEC, IVEC, OUTPERM, IVEC // vperm v4,v4,v4,v8
- VSEL OUTHEAD, IVEC, OUTMASK, INOUT // vsel v2,v7,v4,v9
- LVX (IVP)(IDX), INPTAIL // lvx v5,r10,r7
- STVX INOUT, (IVP)(R0) // stvx v2,r0,r7
- VSEL IVEC, INPTAIL, OUTMASK, INOUT // vsel v2,v4,v5,v9
- STVX INOUT, (IVP)(IDX) // stvx v2,r10,r7
+ VXOR RNDKEY0, RNDKEY0, RNDKEY0 // clear key register
+ P8_STXVB16X(IVEC, R0, IVP) // Save ivec in BE order for next round.
RET // bclr 20,lt,0