// For P9 instruction emulation
#define ESPERM V21 // Endian swapping permute into BE
-#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXV
+#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X
// For {en,de}cryptBlockAsm
#define BLK_INP R3
DATA ·rcon+0x48(SB)/8, $0x0000000000000000
GLOBL ·rcon(SB), RODATA, $80
-// Emulate unaligned BE vector load/stores on LE targets
#ifdef GOARCH_ppc64le
+# ifdef GOPPC64_power9
+#define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT
+#define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB)
+#define XXBRD_ON_LE(VA,VT) XXBRD VA, VT
+# else
+// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned
+// doublewords and byte-swapping each doubleword to emulate BE load/stores.
+#define NEEDS_ESPERM
#define P8_LXVB16X(RA,RB,VT) \
LXVD2X (RA+RB), VT \
VPERM VT, VT, ESPERM, VT
VPERM VS, VS, ESPERM, TMP2 \
STXVD2X TMP2, (RA+RB)
-#define LXSDX_BE(RA,RB,VT) \
- LXSDX (RA+RB), VT \
- VPERM VT, VT, ESPERM, VT
-#else
-#define P8_LXVB16X(RA,RB,VT) \
- LXVD2X (RA+RB), VT
+#define XXBRD_ON_LE(VA,VT) \
+ VPERM VA, VA, ESPERM, VT
-#define P8_STXVB16X(VS,RA,RB) \
- STXVD2X VS, (RA+RB)
-
-#define LXSDX_BE(RA,RB,VT) \
- LXSDX (RA+RB), VT
-#endif
+# endif // defined(GOPPC64_power9)
+#else
+#define P8_LXVB16X(RA,RB,VT) LXVD2X (RA+RB), VT
+#define P8_STXVB16X(VS,RA,RB) STXVD2X VS, (RA+RB)
+#define XXBRD_ON_LE(VA, VT)
+#endif // defined(GOARCH_ppc64le)
// func setEncryptKeyAsm(nr int, key *byte, enc *uint32, dec *uint32)
TEXT ·expandKeyAsm(SB), NOSPLIT|NOFRAME, $0
MOVD enc+16(FP), OUTENC
MOVD dec+24(FP), OUTDEC
-#ifdef GOARCH_ppc64le
+#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), PTR // PTR points to rcon addr
LVX (PTR), ESPERM
ADD $0x10, PTR
RET
l192:
- LXSDX_BE(INP, R0, IN1) // Load next 8 bytes into upper half of VSR in BE order.
+ LXSDX (INP+R0), IN1 // Load next 8 bytes into upper half of VSR.
+ XXBRD_ON_LE(IN1, IN1) // and convert to BE ordering on LE hosts.
MOVD $4, CNT // li 7,4
STXVD2X IN0, (R0+OUTENC)
STXVD2X IN0, (R0+OUTDEC)
MOVD xk+8(FP), R5 // Key pointer
MOVD dst+16(FP), R3 // Dest pointer
MOVD src+24(FP), R4 // Src pointer
-#ifdef GOARCH_ppc64le
+#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), R7
LVX (R7), ESPERM // Permute value for P8_ macros.
#endif
MOVD xk+8(FP), R5 // Key pointer
MOVD dst+16(FP), R3 // Dest pointer
MOVD src+24(FP), R4 // Src pointer
-#ifdef GOARCH_ppc64le
+#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), R7
LVX (R7), ESPERM // Permute value for P8_ macros.
#endif
MOVD enc+40(FP), ENC
MOVD nr+48(FP), ROUNDS
-#ifdef GOARCH_ppc64le
+#ifdef NEEDS_ESPERM
MOVD $·rcon(SB), R11
LVX (R11), ESPERM // Permute value for P8_ macros.
#endif