Introduce scoped ksimd API for ARM and arm64

Introduce a more strict replacement API for
 kernel_neon_begin()/kernel_neon_end() on both ARM and arm64, and replace
 occurrences of the latter pair appearing in lib/crypto
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQQm/3uucuRGn1Dmh0wbglWLn0tXAUCaRRKpgAKCRAwbglWLn0t
 XAKXAQD/L/XErOIGgSnvJnxG9sF+V2S+id1u9aoEJApbqMvW/gD9Fnvjqa7mRM7f
 jSZeDCMB++24SS2zL0/BFiRMmEl5/gc=
 =0IKE
 -----END PGP SIGNATURE-----

Merge tag 'scoped-ksimd-for-arm-arm64' into libcrypto-fpsimd-on-stack

Pull scoped ksimd API for ARM and arm64 from Ard Biesheuvel:

  "Introduce a more strict replacement API for
   kernel_neon_begin()/kernel_neon_end() on both ARM and arm64, and
   replace occurrences of the latter pair appearing in lib/crypto"

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
Eric Biggers 2025-11-12 09:55:55 -08:00
commit 065f040010
13 changed files with 57 additions and 60 deletions

View File

@ -2,14 +2,21 @@
#ifndef _ASM_SIMD_H
#define _ASM_SIMD_H
#include <linux/cleanup.h>
#include <linux/compiler_attributes.h>
#include <linux/preempt.h>
#include <linux/types.h>
#include <asm/neon.h>
static __must_check inline bool may_use_simd(void)
{
return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq()
&& !irqs_disabled();
}
DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
#define scoped_ksimd() scoped_guard(ksimd)
#endif /* _ASM_SIMD_H */

View File

@ -6,12 +6,15 @@
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
#include <linux/cleanup.h>
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
#include <asm/neon.h>
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@ -40,4 +43,8 @@ static __must_check inline bool may_use_simd(void) {
#endif /* ! CONFIG_KERNEL_MODE_NEON */
DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
#define scoped_ksimd() scoped_guard(ksimd)
#endif

View File

@ -12,7 +12,6 @@
#include <asm/cputype.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@ -68,9 +67,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
hchacha_block_arm(state, out, nrounds);
} else {
kernel_neon_begin();
hchacha_block_neon(state, out, nrounds);
kernel_neon_end();
scoped_ksimd()
hchacha_block_neon(state, out, nrounds);
}
}
@ -87,9 +85,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin();
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
scoped_ksimd()
chacha_doneon(state, dst, src, todo, nrounds);
bytes -= todo;
src += todo;

View File

@ -25,9 +25,8 @@ static void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
const u8 point[CURVE25519_KEY_SIZE])
{
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
kernel_neon_begin();
curve25519_neon(out, scalar, point);
kernel_neon_end();
scoped_ksimd()
curve25519_neon(out, scalar, point);
} else {
curve25519_generic(out, scalar, point);
}

View File

@ -6,7 +6,6 @@
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
#include <linux/jump_label.h>
@ -32,9 +31,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin();
poly1305_blocks_neon(state, src, todo, padbit);
kernel_neon_end();
scoped_ksimd()
poly1305_blocks_neon(state, src, todo, padbit);
len -= todo;
src += todo;

View File

@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@ -22,12 +21,12 @@ static void sha1_blocks(struct sha1_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
kernel_neon_begin();
if (static_branch_likely(&have_ce))
sha1_ce_transform(state, data, nblocks);
else
sha1_transform_neon(state, data, nblocks);
kernel_neon_end();
scoped_ksimd() {
if (static_branch_likely(&have_ce))
sha1_ce_transform(state, data, nblocks);
else
sha1_transform_neon(state, data, nblocks);
}
} else {
sha1_block_data_order(state, data, nblocks);
}

View File

@ -22,12 +22,12 @@ static void sha256_blocks(struct sha256_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
kernel_neon_begin();
if (static_branch_likely(&have_ce))
sha256_ce_transform(state, data, nblocks);
else
sha256_block_data_order_neon(state, data, nblocks);
kernel_neon_end();
scoped_ksimd() {
if (static_branch_likely(&have_ce))
sha256_ce_transform(state, data, nblocks);
else
sha256_block_data_order_neon(state, data, nblocks);
}
} else {
sha256_block_data_order(state, data, nblocks);
}

View File

@ -19,9 +19,8 @@ static void sha512_blocks(struct sha512_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
kernel_neon_begin();
sha512_block_data_order_neon(state, data, nblocks);
kernel_neon_end();
scoped_ksimd()
sha512_block_data_order_neon(state, data, nblocks);
} else {
sha512_block_data_order(state, data, nblocks);
}

View File

@ -23,7 +23,6 @@
#include <linux/kernel.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@ -65,9 +64,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
hchacha_block_generic(state, out, nrounds);
} else {
kernel_neon_begin();
hchacha_block_neon(state, out, nrounds);
kernel_neon_end();
scoped_ksimd()
hchacha_block_neon(state, out, nrounds);
}
}
@ -81,9 +79,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin();
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
scoped_ksimd()
chacha_doneon(state, dst, src, todo, nrounds);
bytes -= todo;
src += todo;

View File

@ -6,7 +6,6 @@
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
#include <linux/jump_label.h>
@ -31,9 +30,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin();
poly1305_blocks_neon(state, src, todo, padbit);
kernel_neon_end();
scoped_ksimd()
poly1305_blocks_neon(state, src, todo, padbit);
len -= todo;
src += todo;

View File

@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@ -20,9 +19,9 @@ static void sha1_blocks(struct sha1_block_state *state,
do {
size_t rem;
kernel_neon_begin();
rem = __sha1_ce_transform(state, data, nblocks);
kernel_neon_end();
scoped_ksimd()
rem = __sha1_ce_transform(state, data, nblocks);
data += (nblocks - rem) * SHA1_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);

View File

@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@ -27,17 +26,16 @@ static void sha256_blocks(struct sha256_block_state *state,
do {
size_t rem;
kernel_neon_begin();
rem = __sha256_ce_transform(state,
data, nblocks);
kernel_neon_end();
scoped_ksimd()
rem = __sha256_ce_transform(state, data,
nblocks);
data += (nblocks - rem) * SHA256_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);
} else {
kernel_neon_begin();
sha256_block_neon(state, data, nblocks);
kernel_neon_end();
scoped_ksimd()
sha256_block_neon(state, data, nblocks);
}
} else {
sha256_block_data_order(state, data, nblocks);
@ -66,9 +64,8 @@ static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx,
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE &&
len <= 65536 && likely(may_use_simd())) {
kernel_neon_begin();
sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
kernel_neon_end();
scoped_ksimd()
sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE);
kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE);
return true;

View File

@ -4,7 +4,7 @@
*
* Copyright 2025 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@ -24,9 +24,9 @@ static void sha512_blocks(struct sha512_block_state *state,
do {
size_t rem;
kernel_neon_begin();
rem = __sha512_ce_transform(state, data, nblocks);
kernel_neon_end();
scoped_ksimd()
rem = __sha512_ce_transform(state, data, nblocks);
data += (nblocks - rem) * SHA512_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);