crypto: x86/aes-gcm - Use new AES library API

Switch from the old AES library functions (which use struct
crypto_aes_ctx) to the new ones (which use struct aes_enckey).  This
eliminates the unnecessary computation and caching of the decryption
round keys.  The new AES en/decryption functions are also much faster
and use AES instructions when supported by the CPU.

Since this changes the format of the AES-GCM key structures that are
used by the AES-GCM assembly code, the offsets in the assembly code had
to be updated to match.  Note that the new key structures are smaller,
since the decryption round keys are no longer unnecessarily included.

Acked-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20260112192035.10427-26-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
Eric Biggers 2026-01-12 11:20:23 -08:00
parent 370960c153
commit 104a9526e1
4 changed files with 67 additions and 69 deletions

View file

@ -143,10 +143,11 @@
.octa 0
// Offsets in struct aes_gcm_key_aesni
#define OFFSETOF_AESKEYLEN 480
#define OFFSETOF_H_POWERS 496
#define OFFSETOF_H_POWERS_XORED 624
#define OFFSETOF_H_TIMES_X64 688
#define OFFSETOF_AESKEYLEN 0
#define OFFSETOF_AESROUNDKEYS 16
#define OFFSETOF_H_POWERS 272
#define OFFSETOF_H_POWERS_XORED 400
#define OFFSETOF_H_TIMES_X64 464
.text
@ -505,9 +506,9 @@
// Encrypt an all-zeroes block to get the raw hash subkey.
movl OFFSETOF_AESKEYLEN(KEY), %eax
lea 6*16(KEY,%rax,4), RNDKEYLAST_PTR
movdqa (KEY), H_POW1 // Zero-th round key XOR all-zeroes block
lea 16(KEY), %rax
lea OFFSETOF_AESROUNDKEYS+6*16(KEY,%rax,4), RNDKEYLAST_PTR
movdqa OFFSETOF_AESROUNDKEYS(KEY), H_POW1
lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax
1:
aesenc (%rax), H_POW1
add $16, %rax
@ -624,7 +625,7 @@
// the zero-th AES round key. Clobbers TMP0 and TMP1.
.macro _ctr_begin_8x
movq .Lone(%rip), TMP0
movdqa (KEY), TMP1 // zero-th round key
movdqa OFFSETOF_AESROUNDKEYS(KEY), TMP1 // zero-th round key
.irp i, 0,1,2,3,4,5,6,7
_vpshufb BSWAP_MASK, LE_CTR, AESDATA\i
pxor TMP1, AESDATA\i
@ -726,7 +727,7 @@
movdqu (LE_CTR_PTR), LE_CTR
movl OFFSETOF_AESKEYLEN(KEY), AESKEYLEN
lea 6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR
lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR
// If there are at least 8*16 bytes of data, then continue into the main
// loop, which processes 8*16 bytes of data per iteration.
@ -745,7 +746,7 @@
.if \enc
// Encrypt the first 8 plaintext blocks.
_ctr_begin_8x
lea 16(KEY), %rsi
lea OFFSETOF_AESROUNDKEYS+16(KEY), %rsi
.p2align 4
1:
movdqa (%rsi), TMP0
@ -767,7 +768,7 @@
// Generate the next set of 8 counter blocks and start encrypting them.
_ctr_begin_8x
lea 16(KEY), %rsi
lea OFFSETOF_AESROUNDKEYS+16(KEY), %rsi
// Do a round of AES, and start the GHASH update of 8 ciphertext blocks
// by doing the unreduced multiplication for the first ciphertext block.
@ -869,7 +870,7 @@
// Encrypt the next counter block.
_vpshufb BSWAP_MASK, LE_CTR, TMP0
paddd ONE, LE_CTR
pxor (KEY), TMP0
pxor OFFSETOF_AESROUNDKEYS(KEY), TMP0
lea -6*16(RNDKEYLAST_PTR), %rsi // Reduce code size
cmp $24, AESKEYLEN
jl 128f // AES-128?
@ -926,8 +927,8 @@
// Encrypt a counter block for the last time.
pshufb BSWAP_MASK, LE_CTR
pxor (KEY), LE_CTR
lea 16(KEY), %rsi
pxor OFFSETOF_AESROUNDKEYS(KEY), LE_CTR
lea OFFSETOF_AESROUNDKEYS+16(KEY), %rsi
1:
aesenc (%rsi), LE_CTR
add $16, %rsi
@ -1038,12 +1039,12 @@
// Make %rax point to the 6th from last AES round key. (Using signed
// byte offsets -7*16 through 6*16 decreases code size.)
lea (KEY,AESKEYLEN64,4), %rax
lea OFFSETOF_AESROUNDKEYS(KEY,AESKEYLEN64,4), %rax
// AES-encrypt the counter block and also multiply GHASH_ACC by H^1.
// Interleave the AES and GHASH instructions to improve performance.
pshufb BSWAP_MASK, %xmm0
pxor (KEY), %xmm0
pxor OFFSETOF_AESROUNDKEYS(KEY), %xmm0
cmp $24, AESKEYLEN
jl 128f // AES-128?
je 192f // AES-192?

View file

@ -122,8 +122,9 @@
.octa 2
// Offsets in struct aes_gcm_key_vaes_avx2
#define OFFSETOF_AESKEYLEN 480
#define OFFSETOF_H_POWERS 512
#define OFFSETOF_AESKEYLEN 0
#define OFFSETOF_AESROUNDKEYS 16
#define OFFSETOF_H_POWERS 288
#define NUM_H_POWERS 8
#define OFFSETOFEND_H_POWERS (OFFSETOF_H_POWERS + (NUM_H_POWERS * 16))
#define OFFSETOF_H_POWERS_XORED OFFSETOFEND_H_POWERS
@ -240,9 +241,9 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx2)
// Encrypt an all-zeroes block to get the raw hash subkey.
movl OFFSETOF_AESKEYLEN(KEY), %eax
lea 6*16(KEY,%rax,4), RNDKEYLAST_PTR
vmovdqu (KEY), H_CUR_XMM // Zero-th round key XOR all-zeroes block
lea 16(KEY), %rax
lea OFFSETOF_AESROUNDKEYS+6*16(KEY,%rax,4), RNDKEYLAST_PTR
vmovdqu OFFSETOF_AESROUNDKEYS(KEY), H_CUR_XMM
lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax
1:
vaesenc (%rax), H_CUR_XMM, H_CUR_XMM
add $16, %rax
@ -635,7 +636,7 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
// the last AES round. Clobbers %rax and TMP0.
.macro _aesenc_loop vecs:vararg
_ctr_begin \vecs
lea 16(KEY), %rax
lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax
.Laesenc_loop\@:
vbroadcasti128 (%rax), TMP0
_vaesenc TMP0, \vecs
@ -768,8 +769,8 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
// Make RNDKEYLAST_PTR point to the last AES round key. This is the
// round key with index 10, 12, or 14 for AES-128, AES-192, or AES-256
// respectively. Then load the zero-th and last round keys.
lea 6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR
vbroadcasti128 (KEY), RNDKEY0
lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR
vbroadcasti128 OFFSETOF_AESROUNDKEYS(KEY), RNDKEY0
vbroadcasti128 (RNDKEYLAST_PTR), RNDKEYLAST
// Finish initializing LE_CTR by adding 1 to the second block.
@ -1069,12 +1070,12 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx2)
.endif
// Make %rax point to the last AES round key for the chosen AES variant.
lea 6*16(KEY,AESKEYLEN64,4), %rax
lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), %rax
// Start the AES encryption of the counter block by swapping the counter
// block to big-endian and XOR-ing it with the zero-th AES round key.
vpshufb BSWAP_MASK, LE_CTR, %xmm0
vpxor (KEY), %xmm0, %xmm0
vpxor OFFSETOF_AESROUNDKEYS(KEY), %xmm0, %xmm0
// Complete the AES encryption and multiply GHASH_ACC by H^1.
// Interleave the AES and GHASH instructions to improve performance.

View file

@ -86,10 +86,13 @@
#define NUM_H_POWERS 16
// Offset to AES key length (in bytes) in the key struct
#define OFFSETOF_AESKEYLEN 480
#define OFFSETOF_AESKEYLEN 0
// Offset to AES round keys in the key struct
#define OFFSETOF_AESROUNDKEYS 16
// Offset to start of hash key powers array in the key struct
#define OFFSETOF_H_POWERS 512
#define OFFSETOF_H_POWERS 320
// Offset to end of hash key powers array in the key struct.
//
@ -301,9 +304,9 @@ SYM_FUNC_START(aes_gcm_precompute_vaes_avx512)
// Encrypt an all-zeroes block to get the raw hash subkey.
movl OFFSETOF_AESKEYLEN(KEY), %eax
lea 6*16(KEY,%rax,4), RNDKEYLAST_PTR
vmovdqu (KEY), %xmm0 // Zero-th round key XOR all-zeroes block
add $16, KEY
lea OFFSETOF_AESROUNDKEYS+6*16(KEY,%rax,4), RNDKEYLAST_PTR
vmovdqu OFFSETOF_AESROUNDKEYS(KEY), %xmm0
add $OFFSETOF_AESROUNDKEYS+16, KEY
1:
vaesenc (KEY), %xmm0, %xmm0
add $16, KEY
@ -790,8 +793,8 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512)
// Make RNDKEYLAST_PTR point to the last AES round key. This is the
// round key with index 10, 12, or 14 for AES-128, AES-192, or AES-256
// respectively. Then load the zero-th and last round keys.
lea 6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR
vbroadcasti32x4 (KEY), RNDKEY0
lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), RNDKEYLAST_PTR
vbroadcasti32x4 OFFSETOF_AESROUNDKEYS(KEY), RNDKEY0
vbroadcasti32x4 (RNDKEYLAST_PTR), RNDKEYLAST
// Finish initializing LE_CTR by adding [0, 1, ...] to its low words.
@ -834,7 +837,7 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512)
// Encrypt the first 4 vectors of plaintext blocks. Leave the resulting
// ciphertext in GHASHDATA[0-3] for GHASH.
_ctr_begin_4x
lea 16(KEY), %rax
lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax
1:
vbroadcasti32x4 (%rax), RNDKEY
_vaesenc_4x RNDKEY
@ -957,7 +960,7 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512)
vpshufb BSWAP_MASK, LE_CTR, %zmm0
vpaddd LE_CTR_INC, LE_CTR, LE_CTR
vpxord RNDKEY0, %zmm0, %zmm0
lea 16(KEY), %rax
lea OFFSETOF_AESROUNDKEYS+16(KEY), %rax
1:
vbroadcasti32x4 (%rax), RNDKEY
vaesenc RNDKEY, %zmm0, %zmm0
@ -1087,12 +1090,12 @@ SYM_FUNC_END(aes_gcm_aad_update_vaes_avx512)
.endif
// Make %rax point to the last AES round key for the chosen AES variant.
lea 6*16(KEY,AESKEYLEN64,4), %rax
lea OFFSETOF_AESROUNDKEYS+6*16(KEY,AESKEYLEN64,4), %rax
// Start the AES encryption of the counter block by swapping the counter
// block to big-endian and XOR-ing it with the zero-th AES round key.
vpshufb BSWAP_MASK, LE_CTR, %xmm0
vpxor (KEY), %xmm0, %xmm0
vpxor OFFSETOF_AESROUNDKEYS(KEY), %xmm0, %xmm0
// Complete the AES encryption and multiply GHASH_ACC by H^1.
// Interleave the AES and GHASH instructions to improve performance.

View file

@ -780,7 +780,7 @@ DEFINE_AVX_SKCIPHER_ALGS(vaes_avx512, "vaes-avx512", 800);
/* The common part of the x86_64 AES-GCM key struct */
struct aes_gcm_key {
/* Expanded AES key and the AES key length in bytes */
struct crypto_aes_ctx aes_key;
struct aes_enckey aes_key;
/* RFC4106 nonce (used only by the rfc4106 algorithms) */
u32 rfc4106_nonce;
@ -789,11 +789,10 @@ struct aes_gcm_key {
/* Key struct used by the AES-NI implementations of AES-GCM */
struct aes_gcm_key_aesni {
/*
* Common part of the key. The assembly code requires 16-byte alignment
* for the round keys; we get this by them being located at the start of
* the struct and the whole struct being 16-byte aligned.
* Common part of the key. 16-byte alignment is required by the
* assembly code.
*/
struct aes_gcm_key base;
struct aes_gcm_key base __aligned(16);
/*
* Powers of the hash key H^8 through H^1. These are 128-bit values.
@ -824,10 +823,9 @@ struct aes_gcm_key_aesni {
struct aes_gcm_key_vaes_avx2 {
/*
* Common part of the key. The assembly code prefers 16-byte alignment
* for the round keys; we get this by them being located at the start of
* the struct and the whole struct being 32-byte aligned.
* for this.
*/
struct aes_gcm_key base;
struct aes_gcm_key base __aligned(16);
/*
* Powers of the hash key H^8 through H^1. These are 128-bit values.
@ -854,10 +852,9 @@ struct aes_gcm_key_vaes_avx2 {
struct aes_gcm_key_vaes_avx512 {
/*
* Common part of the key. The assembly code prefers 16-byte alignment
* for the round keys; we get this by them being located at the start of
* the struct and the whole struct being 64-byte aligned.
* for this.
*/
struct aes_gcm_key base;
struct aes_gcm_key base __aligned(16);
/*
* Powers of the hash key H^16 through H^1. These are 128-bit values.
@ -1182,26 +1179,26 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
}
/* The assembly code assumes the following offsets. */
BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_enc) != 0);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, base.aes_key.key_length) != 480);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers) != 496);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_powers_xored) != 624);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_aesni, h_times_x64) != 688);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.key_enc) != 0);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.key_length) != 480);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers) != 512);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx2, h_powers_xored) != 640);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_enc) != 0);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.key_length) != 480);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, h_powers) != 512);
BUILD_BUG_ON(offsetof(struct aes_gcm_key_vaes_avx512, padding) != 768);
static_assert(offsetof(struct aes_gcm_key_aesni, base.aes_key.len) == 0);
static_assert(offsetof(struct aes_gcm_key_aesni, base.aes_key.k.rndkeys) == 16);
static_assert(offsetof(struct aes_gcm_key_aesni, h_powers) == 272);
static_assert(offsetof(struct aes_gcm_key_aesni, h_powers_xored) == 400);
static_assert(offsetof(struct aes_gcm_key_aesni, h_times_x64) == 464);
static_assert(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.len) == 0);
static_assert(offsetof(struct aes_gcm_key_vaes_avx2, base.aes_key.k.rndkeys) == 16);
static_assert(offsetof(struct aes_gcm_key_vaes_avx2, h_powers) == 288);
static_assert(offsetof(struct aes_gcm_key_vaes_avx2, h_powers_xored) == 416);
static_assert(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.len) == 0);
static_assert(offsetof(struct aes_gcm_key_vaes_avx512, base.aes_key.k.rndkeys) == 16);
static_assert(offsetof(struct aes_gcm_key_vaes_avx512, h_powers) == 320);
static_assert(offsetof(struct aes_gcm_key_vaes_avx512, padding) == 576);
err = aes_prepareenckey(&key->aes_key, raw_key, keylen);
if (err)
return err;
if (likely(crypto_simd_usable())) {
err = aes_check_keylen(keylen);
if (err)
return err;
kernel_fpu_begin();
aesni_set_key(&key->aes_key, raw_key, keylen);
aes_gcm_precompute(key, flags);
kernel_fpu_end();
} else {
@ -1215,10 +1212,6 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *raw_key,
be128 h;
int i;
err = aes_expandkey(&key->aes_key, raw_key, keylen);
if (err)
return err;
/* Encrypt the all-zeroes block to get the hash key H^1 */
aes_encrypt(&key->aes_key, (u8 *)&h1, (u8 *)&h1);