From 2ecf747e088fa6568621d719fbdf41c0d9221cc5 Mon Sep 17 00:00:00 2001
From: Lynn Boger <laboger@linux.vnet.ibm.com>
Date: Sat, 7 May 2022 06:43:03 -0500
Subject: [PATCH] crypto/aes: add BE support to gcm

This adds the code to allow the gcm assembler code to work on
big endian ppc64.

Updates #18499

Change-Id: Iab1ffc9b8af38a0605a91f2621dd9f4a9397b945
Reviewed-on: https://go-review.googlesource.com/c/go/+/404795
Reviewed-by: Heschi Kreinick <heschi@google.com>
Reviewed-by: Paul Murphy <murp@ibm.com>
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: David Chase <drchase@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
---
 .../aes/{gcm_ppc64le.go => gcm_ppc64x.go}     | 14 +++++--
 .../aes/{gcm_ppc64le.s => gcm_ppc64x.s}       | 39 ++++++++++++++++---
 2 files changed, 44 insertions(+), 9 deletions(-)
 rename src/crypto/aes/{gcm_ppc64le.go => gcm_ppc64x.go} (96%)
 rename src/crypto/aes/{gcm_ppc64le.s => gcm_ppc64x.s} (96%)

diff --git a/src/crypto/aes/gcm_ppc64le.go b/src/crypto/aes/gcm_ppc64x.go
similarity index 96%
rename from src/crypto/aes/gcm_ppc64le.go
rename to src/crypto/aes/gcm_ppc64x.go
index cba6c8873f..44b27056d6 100644
--- a/src/crypto/aes/gcm_ppc64le.go
+++ b/src/crypto/aes/gcm_ppc64x.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build ppc64le
+//go:build ppc64le || ppc64
 
 package aes
 
@@ -11,6 +11,7 @@ import (
 	"crypto/subtle"
 	"encoding/binary"
 	"errors"
+	"runtime"
 )
 
 // This file implements GCM using an optimized GHASH function.
@@ -53,15 +54,22 @@ type gcmAsm struct {
 // NewGCM returns the AES cipher wrapped in Galois Counter Mode. This is only
 // called by crypto/cipher.NewGCM via the gcmAble interface.
 func (c *aesCipherAsm) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) {
+	var h1, h2 uint64
 	g := &gcmAsm{cipher: c, ks: c.enc, nonceSize: nonceSize, tagSize: tagSize}
 
 	hle := make([]byte, gcmBlockSize)
+
 	c.Encrypt(hle, hle)
 
 	// Reverse the bytes in each 8 byte chunk
 	// Load little endian, store big endian
-	h1 := binary.LittleEndian.Uint64(hle[:8])
-	h2 := binary.LittleEndian.Uint64(hle[8:])
+	if runtime.GOARCH == "ppc64le" {
+		h1 = binary.LittleEndian.Uint64(hle[:8])
+		h2 = binary.LittleEndian.Uint64(hle[8:])
+	} else {
+		h1 = binary.BigEndian.Uint64(hle[:8])
+		h2 = binary.BigEndian.Uint64(hle[8:])
+	}
 	binary.BigEndian.PutUint64(hle[:8], h1)
 	binary.BigEndian.PutUint64(hle[8:], h2)
 	gcmInit(&g.productTable, hle)
diff --git a/src/crypto/aes/gcm_ppc64le.s b/src/crypto/aes/gcm_ppc64x.s
similarity index 96%
rename from src/crypto/aes/gcm_ppc64le.s
rename to src/crypto/aes/gcm_ppc64x.s
index 3945fc9ab3..72f0b8e01c 100644
--- a/src/crypto/aes/gcm_ppc64le.s
+++ b/src/crypto/aes/gcm_ppc64x.s
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build ppc64 || ppc64le
+
 // Based on CRYPTOGAMS code with the following comment:
 // # ====================================================================
 // # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@@ -220,15 +222,17 @@ TEXT Â·gcmHash(SB), NOSPLIT, $0-64
 
 	LXVD2X   (HTBL)(R8), VHL    // load pre-computed table
 	MOVD     $0x40, R8
-	LVSL     (R0)(R0), LEMASK
 	LXVD2X   (HTBL)(R9), VH
 	MOVD     $0x50, R9
-	VSPLTISB $0x07, T0
 	LXVD2X   (HTBL)(R10), VHH
 	MOVD     $0x60, R10
-	VXOR     LEMASK, T0, LEMASK
 	LXVD2X   (HTBL)(R0), VXC2
+#ifdef GOARCH_ppc64le
+	LVSL     (R0)(R0), LEMASK
+	VSPLTISB $0x07, T0
+	VXOR     LEMASK, T0, LEMASK
 	VPERM    XL, XL, LEMASK, XL
+#endif
 	VXOR     ZERO, ZERO, ZERO
 
 	CMPU LEN, $64
@@ -237,7 +241,9 @@ TEXT Â·gcmHash(SB), NOSPLIT, $0-64
 	LXVD2X (INP)(R0), VIN
 	ADD    $16, INP, INP
 	SUBCCC $16, LEN, LEN
+#ifdef GOARCH_ppc64le
 	VPERM  IN, IN, LEMASK, IN
+#endif
 	VXOR   IN, XL, IN
 	BEQ    short
 
@@ -249,7 +255,9 @@ TEXT Â·gcmHash(SB), NOSPLIT, $0-64
 
 loop_2x:
 	LXVD2X (INP)(R0), VIN1
+#ifdef GOARCH_ppc64le
 	VPERM  IN1, IN1, LEMASK, IN1
+#endif
 
 	SUBC    $32, LEN, LEN
 	VPMSUMD IN, H2L, XL   // H^2.loÂ·Xi.lo
@@ -280,7 +288,9 @@ loop_2x:
 
 	VSLDOI  $8, XL, XL, T1     // 2nd reduction phase
 	VPMSUMD XL, XC2, XL
+#ifdef GOARCH_ppc64le
 	VPERM   IN, IN, LEMASK, IN
+#endif
 	VXOR    T1, XH, T1
 	VXOR    IN, T1, IN
 	VXOR    IN, XL, IN
@@ -311,7 +321,9 @@ short:
 
 even:
 	VXOR    XL, T1, XL
+#ifdef GOARCH_ppc64le
 	VPERM   XL, XL, LEMASK, XL
+#endif
 	STXVD2X VXL, (XIP+R0)
 
 	OR R12, R12, R12 // write out Xi
@@ -349,10 +361,12 @@ gcm_ghash_p8_4x:
 	LXVD2X (INP)(R9), VIN2
 	LXVD2X (INP)(R10), VIN3
 	ADD    $0x40, INP, INP
+#ifdef GOARCH_ppc64le
 	VPERM  IN0, IN0, LEMASK, IN0
 	VPERM  IN1, IN1, LEMASK, IN1
 	VPERM  IN2, IN2, LEMASK, IN2
 	VPERM  IN3, IN3, LEMASK, IN3
+#endif
 
 	VXOR IN0, XL, XH
 
@@ -383,10 +397,12 @@ loop_4x:
 	LXVD2X (INP)(R9), VIN2
 	LXVD2X (INP)(R10), VIN3
 	ADD    $0x40, INP, INP
+#ifdef GOARCH_ppc64le
 	VPERM  IN1, IN1, LEMASK, IN1
 	VPERM  IN2, IN2, LEMASK, IN2
 	VPERM  IN3, IN3, LEMASK, IN3
 	VPERM  IN0, IN0, LEMASK, IN0
+#endif
 
 	VPMSUMD XH, H4L, XL   // H^4.loÂ·Xi.lo
 	VPMSUMD XH, H4, XM    // H^4.hiÂ·Xi.lo+H^4.loÂ·Xi.hi
@@ -463,9 +479,11 @@ tail_4x:
 
 three:
 	LXVD2X (INP)(R9), VIN2
+#ifdef GOARCH_ppc64le
 	VPERM  IN0, IN0, LEMASK, IN0
 	VPERM  IN1, IN1, LEMASK, IN1
 	VPERM  IN2, IN2, LEMASK, IN2
+#endif
 
 	VXOR IN0, XL, XH
 	VOR  H3L, H3L, H4L
@@ -483,8 +501,10 @@ three:
 	JMP  tail_4x
 
 two:
+#ifdef GOARCH_ppc64le
 	VPERM IN0, IN0, LEMASK, IN0
 	VPERM IN1, IN1, LEMASK, IN1
+#endif
 
 	VXOR  IN, XL, XH
 	VPERM ZERO, IN1, LOPERM, T0
@@ -501,7 +521,9 @@ two:
 	JMP tail_4x
 
 one:
+#ifdef GOARCH_ppc64le
 	VPERM IN0, IN0, LEMASK, IN0
+#endif
 
 	VSLDOI $8, ZERO, H, H4L
 	VOR    H, H, H4
@@ -515,7 +537,9 @@ one:
 	JMP tail_4x
 
 done_4x:
+#ifdef GOARCH_ppc64le
 	VPERM   XL, XL, LEMASK, XL
+#endif
 	STXVD2X VXL, (XIP+R0)      // write out Xi
 	RET
 
@@ -530,13 +554,14 @@ TEXT Â·gcmMul(SB), NOSPLIT, $0-32
 	LXVD2X (XIP)(R0), VIN // load Xi
 
 	LXVD2X   (HTBL)(R8), VHL    // Load pre-computed table
-	LVSL     (R0)(R0), LEMASK
 	LXVD2X   (HTBL)(R9), VH
-	VSPLTISB $0x07, T0
 	LXVD2X   (HTBL)(R10), VHH
-	VXOR     LEMASK, T0, LEMASK
 	LXVD2X   (HTBL)(R0), VXC2
+#ifdef GOARCH_ppc64le
+	VSPLTISB $0x07, T0
+	VXOR     LEMASK, T0, LEMASK
 	VPERM    IN, IN, LEMASK, IN
+#endif
 	VXOR     ZERO, ZERO, ZERO
 
 	VPMSUMD IN, HL, XL // H.loÂ·Xi.lo
@@ -558,6 +583,8 @@ TEXT Â·gcmMul(SB), NOSPLIT, $0-32
 	VXOR    T1, XH, T1
 	VXOR    XL, T1, XL
 
+#ifdef GOARCH_ppc64le
 	VPERM   XL, XL, LEMASK, XL
+#endif
 	STXVD2X VXL, (XIP+R0)      // write out Xi
 	RET
-- 
2.50.0