mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 04:04:43 +01:00
lib/crypto: arm64/nh: Migrate optimized code into library
Migrate the arm64 NEON implementation of NH into lib/crypto/. This makes the nh() function be optimized on arm64 kernels. Note: this temporarily makes the adiantum template not utilize the arm64 optimized NH code. This is resolved in a later commit that converts the adiantum template to use nh() instead of "nhpoly1305". Link: https://lore.kernel.org/r/20251211011846.8179-5-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
parent
29e39a11f5
commit
b4a8528d17
7 changed files with 37 additions and 94 deletions
|
|
@ -118,6 +118,7 @@ config CRYPTO_LIB_NH_ARCH
|
|||
bool
|
||||
depends on CRYPTO_LIB_NH && !UML
|
||||
default y if ARM && KERNEL_MODE_NEON
|
||||
default y if ARM64 && KERNEL_MODE_NEON
|
||||
|
||||
config CRYPTO_LIB_POLY1305
|
||||
tristate
|
||||
|
|
|
|||
|
|
@ -136,6 +136,7 @@ libnh-y := nh.o
|
|||
ifeq ($(CONFIG_CRYPTO_LIB_NH_ARCH),y)
|
||||
CFLAGS_nh.o += -I$(src)/$(SRCARCH)
|
||||
libnh-$(CONFIG_ARM) += arm/nh-neon-core.o
|
||||
libnh-$(CONFIG_ARM64) += arm64/nh-neon-core.o
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
|
|
|||
103
lib/crypto/arm64/nh-neon-core.S
Normal file
103
lib/crypto/arm64/nh-neon-core.S
Normal file
|
|
@ -0,0 +1,103 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* NH - ε-almost-universal hash function, ARM64 NEON accelerated version
|
||||
*
|
||||
* Copyright 2018 Google LLC
|
||||
*
|
||||
* Author: Eric Biggers <ebiggers@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
KEY .req x0
|
||||
MESSAGE .req x1
|
||||
MESSAGE_LEN .req x2
|
||||
HASH .req x3
|
||||
|
||||
PASS0_SUMS .req v0
|
||||
PASS1_SUMS .req v1
|
||||
PASS2_SUMS .req v2
|
||||
PASS3_SUMS .req v3
|
||||
K0 .req v4
|
||||
K1 .req v5
|
||||
K2 .req v6
|
||||
K3 .req v7
|
||||
T0 .req v8
|
||||
T1 .req v9
|
||||
T2 .req v10
|
||||
T3 .req v11
|
||||
T4 .req v12
|
||||
T5 .req v13
|
||||
T6 .req v14
|
||||
T7 .req v15
|
||||
|
||||
.macro _nh_stride k0, k1, k2, k3
|
||||
|
||||
// Load next message stride
|
||||
ld1 {T3.16b}, [MESSAGE], #16
|
||||
|
||||
// Load next key stride
|
||||
ld1 {\k3\().4s}, [KEY], #16
|
||||
|
||||
// Add message words to key words
|
||||
add T0.4s, T3.4s, \k0\().4s
|
||||
add T1.4s, T3.4s, \k1\().4s
|
||||
add T2.4s, T3.4s, \k2\().4s
|
||||
add T3.4s, T3.4s, \k3\().4s
|
||||
|
||||
// Multiply 32x32 => 64 and accumulate
|
||||
mov T4.d[0], T0.d[1]
|
||||
mov T5.d[0], T1.d[1]
|
||||
mov T6.d[0], T2.d[1]
|
||||
mov T7.d[0], T3.d[1]
|
||||
umlal PASS0_SUMS.2d, T0.2s, T4.2s
|
||||
umlal PASS1_SUMS.2d, T1.2s, T5.2s
|
||||
umlal PASS2_SUMS.2d, T2.2s, T6.2s
|
||||
umlal PASS3_SUMS.2d, T3.2s, T7.2s
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void nh_neon(const u32 *key, const u8 *message, size_t message_len,
|
||||
* __le64 hash[NH_NUM_PASSES])
|
||||
*
|
||||
* It's guaranteed that message_len % 16 == 0.
|
||||
*/
|
||||
SYM_FUNC_START(nh_neon)
|
||||
|
||||
ld1 {K0.4s,K1.4s}, [KEY], #32
|
||||
movi PASS0_SUMS.2d, #0
|
||||
movi PASS1_SUMS.2d, #0
|
||||
ld1 {K2.4s}, [KEY], #16
|
||||
movi PASS2_SUMS.2d, #0
|
||||
movi PASS3_SUMS.2d, #0
|
||||
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #64
|
||||
blt .Lloop4_done
|
||||
.Lloop4:
|
||||
_nh_stride K0, K1, K2, K3
|
||||
_nh_stride K1, K2, K3, K0
|
||||
_nh_stride K2, K3, K0, K1
|
||||
_nh_stride K3, K0, K1, K2
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #64
|
||||
bge .Lloop4
|
||||
|
||||
.Lloop4_done:
|
||||
ands MESSAGE_LEN, MESSAGE_LEN, #63
|
||||
beq .Ldone
|
||||
_nh_stride K0, K1, K2, K3
|
||||
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #16
|
||||
beq .Ldone
|
||||
_nh_stride K1, K2, K3, K0
|
||||
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #16
|
||||
beq .Ldone
|
||||
_nh_stride K2, K3, K0, K1
|
||||
|
||||
.Ldone:
|
||||
// Sum the accumulators for each pass, then store the sums to 'hash'
|
||||
addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
|
||||
addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
|
||||
st1 {T0.16b,T1.16b}, [HASH]
|
||||
ret
|
||||
SYM_FUNC_END(nh_neon)
|
||||
34
lib/crypto/arm64/nh.h
Normal file
34
lib/crypto/arm64/nh.h
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* ARM64 accelerated implementation of NH
|
||||
*
|
||||
* Copyright 2018 Google LLC
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
|
||||
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
|
||||
|
||||
asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
|
||||
__le64 hash[NH_NUM_PASSES]);
|
||||
|
||||
static bool nh_arch(const u32 *key, const u8 *message, size_t message_len,
|
||||
__le64 hash[NH_NUM_PASSES])
|
||||
{
|
||||
if (static_branch_likely(&have_neon) && message_len >= 64 &&
|
||||
may_use_simd()) {
|
||||
scoped_ksimd()
|
||||
nh_neon(key, message, message_len, hash);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define nh_mod_init_arch nh_mod_init_arch
|
||||
static void nh_mod_init_arch(void)
|
||||
{
|
||||
if (cpu_have_named_feature(ASIMD))
|
||||
static_branch_enable(&have_neon);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue