From fcabe15c7783f14d6997a89154a8754790c648ea Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Wed, 31 Jan 2024 10:51:46 +0100
Subject: [PATCH] core: crypto: fix internal AES-GCM counter implementation

We have several AES-GCM implementations in crypto libraries and
internal. The internal implementation comes in two flavours, with Arm
crypto extensions (CFG_CRYPTO_WITH_CE=y) and a pure software
implementation.

Each block to be encrypted is xored with an encrypted counter block of
equal size (16 bytes). For each block the counter is increased.

Prior to this patch the entire counter block was increased as a 128-bit
integer, but that's not how AES-GCM is defined. In AES-GCM only the
least significant 32 bits of the counter block are increased, leaving
the rest untouched.  The difference is only noticeable when the 32 bits
has reached 0xffffffff and wraps to 0x00000000 on next increment. With a
128-bit integer this would propagate into other parts of the block.

Fix this by only incrementing the last 32-bit word in the counter block,
both in the pure software implementation and when using Arm crypto
extensions.

Link: https://github.com/OP-TEE/optee_os/issues/6659
Fixes: 1fca7e269b13 ("core: crypto: add new AES-GCM implementation")
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
Acked-by: Jerome Forissier <jerome.forissier@linaro.org>
Acked-by: Etienne Carriere <etienne.carriere@foss.st.com>
---
 core/arch/arm/crypto/ghash-ce-core_a64.S | 28 +++++++++++++-----------
 core/crypto/aes-gcm.c                    | 18 +++++++--------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/core/arch/arm/crypto/ghash-ce-core_a64.S b/core/arch/arm/crypto/ghash-ce-core_a64.S
index dc86132e9e0..0abf91cec93 100644
--- a/core/arch/arm/crypto/ghash-ce-core_a64.S
+++ b/core/arch/arm/crypto/ghash-ce-core_a64.S
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: BSD-2-Clause */
 /*
- * Copyright (c) 2020 Linaro Limited
+ * Copyright (c) 2020, 2024 Linaro Limited
  * Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
  *
  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
@@ -11,11 +11,10 @@
 #define CPU_LE(x...)	x
 
 /*
- * If the lower half of CTR is initialized with zeroes or a low value we
- * can expect that the upper half will remain unchanged. As an optimization
- * make the code to increase the upper half optional.
+ * Only increase the lowest quarter, that is, 32-bits of the counter. If
+ * it wraps it must not propagate into the upper bits.
  */
-#define INC_HALF_CTR	0
+#define INC_QUART_CTR	1
 
 	SHASH		.req	v0
 	SHASH2		.req	v1
@@ -412,7 +411,7 @@ END_FUNC pmull_ghash_update_p8
 	ld1		{SHASH.2d}, [x4], #16
 	ld1		{HH.2d}, [x4]
 	ld1		{XL.2d}, [x1]
-#if INC_HALF_CTR
+#if INC_QUART_CTR
 	ldr		x8, [x5, #8]			// load lower counter
 #else
 	ldp		x9, x8, [x5]			// load counter
@@ -422,7 +421,7 @@ END_FUNC pmull_ghash_update_p8
 	trn1		SHASH2.2d, SHASH.2d, HH.2d
 	trn2		T1.2d, SHASH.2d, HH.2d
 CPU_LE(	rev		x8, x8		)
-#if !INC_HALF_CTR
+#if !INC_QUART_CTR
 CPU_LE(	rev		x9, x9		)
 #endif
 	shl		MASK.2d, MASK.2d, #57
@@ -437,10 +436,13 @@ CPU_LE(	rev		x9, x9		)
 
 0:	ld1		{INP0.16b-INP1.16b}, [x3], #32
 
-#if INC_HALF_CTR
+#if INC_QUART_CTR
+	lsr		x12, x8, #32		// Save the upper 32 bits
 	rev		x9, x8
-	add		x11, x8, #1
-	add		x8, x8, #2
+	add		w11, w8, #1
+	add		w8, w8, #2
+	add		x11, x11, x12, lsl #32	// Restore the upper 32 bits
+	add		x8, x8, x12, lsl #32
 #endif
 
 	.if		\enc == 1
@@ -450,7 +452,7 @@ CPU_LE(	rev		x9, x9		)
 
 	sub		w0, w0, #2
 
-#if INC_HALF_CTR
+#if INC_QUART_CTR
 	ld1		{KS0.8b}, [x5]			// load upper counter
 	rev		x11, x11
 	mov		KS1.8b, KS0.8b
@@ -563,11 +565,11 @@ CPU_LE(	rev		x9, x9		)
 	cbnz		w0, 0b
 
 CPU_LE(	rev		x8, x8		)
-#if !INC_HALF_CTR
+#if !INC_QUART_CTR
 CPU_LE(	rev		x9, x9		)
 #endif
 	st1		{XL.2d}, [x1]
-#if INC_HALF_CTR
+#if INC_QUART_CTR
 	str		x8, [x5, #8]			// store lower counter
 #else
 	stp		x9, x8, [x5]			// store counter
diff --git a/core/crypto/aes-gcm.c b/core/crypto/aes-gcm.c
index 423b6f33278..c16c138dc3d 100644
--- a/core/crypto/aes-gcm.c
+++ b/core/crypto/aes-gcm.c
@@ -346,25 +346,23 @@ TEE_Result internal_aes_gcm_dec_final(struct internal_aes_gcm_ctx *ctx,
 void internal_aes_gcm_inc_ctr(struct internal_aes_gcm_state *state)
 {
 	uint64_t c = 0;
+	uint32_t lower = 0;
 
-	c = TEE_U64_FROM_BIG_ENDIAN(state->ctr[1]) + 1;
+	c = TEE_U64_FROM_BIG_ENDIAN(state->ctr[1]);
+	lower = c + 1;
+	c = (c & GENMASK_64(63, 32)) | lower;
 	state->ctr[1] = TEE_U64_TO_BIG_ENDIAN(c);
-	if (!c) {
-		c = TEE_U64_FROM_BIG_ENDIAN(state->ctr[0]) + 1;
-		state->ctr[0] = TEE_U64_TO_BIG_ENDIAN(c);
-	}
 }
 
 void internal_aes_gcm_dec_ctr(struct internal_aes_gcm_state *state)
 {
 	uint64_t c = 0;
+	uint32_t lower = 0;
 
-	c = TEE_U64_FROM_BIG_ENDIAN(state->ctr[1]) - 1;
+	c = TEE_U64_FROM_BIG_ENDIAN(state->ctr[1]);
+	lower = c - 1;
+	c = (c & GENMASK_64(63, 32)) | lower;
 	state->ctr[1] = TEE_U64_TO_BIG_ENDIAN(c);
-	if (c == UINT64_MAX) {
-		c = TEE_U64_FROM_BIG_ENDIAN(state->ctr[0]) - 1;
-		state->ctr[0] = TEE_U64_TO_BIG_ENDIAN(c);
-	}
 }
 
 TEE_Result internal_aes_gcm_enc(const struct internal_aes_gcm_key *enc_key,