Introduce scoped ksimd API for ARM and arm64

Introduce a more strict replacement API for
 kernel_neon_begin()/kernel_neon_end() on both ARM and arm64, and replace
 occurrences of the latter pair appearing in lib/crypto
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQQm/3uucuRGn1Dmh0wbglWLn0tXAUCaRRKpgAKCRAwbglWLn0t
 XAKXAQD/L/XErOIGgSnvJnxG9sF+V2S+id1u9aoEJApbqMvW/gD9Fnvjqa7mRM7f
 jSZeDCMB++24SS2zL0/BFiRMmEl5/gc=
 =0IKE
 -----END PGP SIGNATURE-----

Merge tag 'scoped-ksimd-for-arm-arm64' into libcrypto-fpsimd-on-stack

Pull scoped ksimd API for ARM and arm64 from Ard Biesheuvel:

  "Introduce a more strict replacement API for
   kernel_neon_begin()/kernel_neon_end() on both ARM and arm64, and
   replace occurrences of the latter pair appearing in lib/crypto"

Signed-off-by: Eric Biggers <ebiggers@kernel.org>
This commit is contained in:
Eric Biggers 2025-11-12 09:55:55 -08:00
commit 065f040010
13 changed files with 57 additions and 60 deletions

View File

@ -2,14 +2,21 @@
#ifndef _ASM_SIMD_H #ifndef _ASM_SIMD_H
#define _ASM_SIMD_H #define _ASM_SIMD_H
#include <linux/cleanup.h>
#include <linux/compiler_attributes.h> #include <linux/compiler_attributes.h>
#include <linux/preempt.h> #include <linux/preempt.h>
#include <linux/types.h> #include <linux/types.h>
#include <asm/neon.h>
static __must_check inline bool may_use_simd(void) static __must_check inline bool may_use_simd(void)
{ {
return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq() return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq()
&& !irqs_disabled(); && !irqs_disabled();
} }
DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
#define scoped_ksimd() scoped_guard(ksimd)
#endif /* _ASM_SIMD_H */ #endif /* _ASM_SIMD_H */

View File

@ -6,12 +6,15 @@
#ifndef __ASM_SIMD_H #ifndef __ASM_SIMD_H
#define __ASM_SIMD_H #define __ASM_SIMD_H
#include <linux/cleanup.h>
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/irqflags.h> #include <linux/irqflags.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/preempt.h> #include <linux/preempt.h>
#include <linux/types.h> #include <linux/types.h>
#include <asm/neon.h>
#ifdef CONFIG_KERNEL_MODE_NEON #ifdef CONFIG_KERNEL_MODE_NEON
/* /*
@ -40,4 +43,8 @@ static __must_check inline bool may_use_simd(void) {
#endif /* ! CONFIG_KERNEL_MODE_NEON */ #endif /* ! CONFIG_KERNEL_MODE_NEON */
DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
#define scoped_ksimd() scoped_guard(ksimd)
#endif #endif

View File

@ -12,7 +12,6 @@
#include <asm/cputype.h> #include <asm/cputype.h>
#include <asm/hwcap.h> #include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h> #include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@ -68,9 +67,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
hchacha_block_arm(state, out, nrounds); hchacha_block_arm(state, out, nrounds);
} else { } else {
kernel_neon_begin(); scoped_ksimd()
hchacha_block_neon(state, out, nrounds); hchacha_block_neon(state, out, nrounds);
kernel_neon_end();
} }
} }
@ -87,9 +85,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do { do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K); unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin(); scoped_ksimd()
chacha_doneon(state, dst, src, todo, nrounds); chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo; bytes -= todo;
src += todo; src += todo;

View File

@ -25,9 +25,8 @@ static void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
const u8 point[CURVE25519_KEY_SIZE]) const u8 point[CURVE25519_KEY_SIZE])
{ {
if (static_branch_likely(&have_neon) && crypto_simd_usable()) { if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
kernel_neon_begin(); scoped_ksimd()
curve25519_neon(out, scalar, point); curve25519_neon(out, scalar, point);
kernel_neon_end();
} else { } else {
curve25519_generic(out, scalar, point); curve25519_generic(out, scalar, point);
} }

View File

@ -6,7 +6,6 @@
*/ */
#include <asm/hwcap.h> #include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h> #include <asm/simd.h>
#include <linux/cpufeature.h> #include <linux/cpufeature.h>
#include <linux/jump_label.h> #include <linux/jump_label.h>
@ -32,9 +31,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do { do {
unsigned int todo = min_t(unsigned int, len, SZ_4K); unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin(); scoped_ksimd()
poly1305_blocks_neon(state, src, todo, padbit); poly1305_blocks_neon(state, src, todo, padbit);
kernel_neon_end();
len -= todo; len -= todo;
src += todo; src += todo;

View File

@ -4,7 +4,6 @@
* *
* Copyright 2025 Google LLC * Copyright 2025 Google LLC
*/ */
#include <asm/neon.h>
#include <asm/simd.h> #include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@ -22,12 +21,12 @@ static void sha1_blocks(struct sha1_block_state *state,
{ {
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) { static_branch_likely(&have_neon) && likely(may_use_simd())) {
kernel_neon_begin(); scoped_ksimd() {
if (static_branch_likely(&have_ce)) if (static_branch_likely(&have_ce))
sha1_ce_transform(state, data, nblocks); sha1_ce_transform(state, data, nblocks);
else else
sha1_transform_neon(state, data, nblocks); sha1_transform_neon(state, data, nblocks);
kernel_neon_end(); }
} else { } else {
sha1_block_data_order(state, data, nblocks); sha1_block_data_order(state, data, nblocks);
} }

View File

@ -22,12 +22,12 @@ static void sha256_blocks(struct sha256_block_state *state,
{ {
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) { static_branch_likely(&have_neon) && likely(may_use_simd())) {
kernel_neon_begin(); scoped_ksimd() {
if (static_branch_likely(&have_ce)) if (static_branch_likely(&have_ce))
sha256_ce_transform(state, data, nblocks); sha256_ce_transform(state, data, nblocks);
else else
sha256_block_data_order_neon(state, data, nblocks); sha256_block_data_order_neon(state, data, nblocks);
kernel_neon_end(); }
} else { } else {
sha256_block_data_order(state, data, nblocks); sha256_block_data_order(state, data, nblocks);
} }

View File

@ -19,9 +19,8 @@ static void sha512_blocks(struct sha512_block_state *state,
{ {
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) { static_branch_likely(&have_neon) && likely(may_use_simd())) {
kernel_neon_begin(); scoped_ksimd()
sha512_block_data_order_neon(state, data, nblocks); sha512_block_data_order_neon(state, data, nblocks);
kernel_neon_end();
} else { } else {
sha512_block_data_order(state, data, nblocks); sha512_block_data_order(state, data, nblocks);
} }

View File

@ -23,7 +23,6 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <asm/hwcap.h> #include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h> #include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@ -65,9 +64,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
hchacha_block_generic(state, out, nrounds); hchacha_block_generic(state, out, nrounds);
} else { } else {
kernel_neon_begin(); scoped_ksimd()
hchacha_block_neon(state, out, nrounds); hchacha_block_neon(state, out, nrounds);
kernel_neon_end();
} }
} }
@ -81,9 +79,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do { do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K); unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin(); scoped_ksimd()
chacha_doneon(state, dst, src, todo, nrounds); chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo; bytes -= todo;
src += todo; src += todo;

View File

@ -6,7 +6,6 @@
*/ */
#include <asm/hwcap.h> #include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h> #include <asm/simd.h>
#include <linux/cpufeature.h> #include <linux/cpufeature.h>
#include <linux/jump_label.h> #include <linux/jump_label.h>
@ -31,9 +30,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do { do {
unsigned int todo = min_t(unsigned int, len, SZ_4K); unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin(); scoped_ksimd()
poly1305_blocks_neon(state, src, todo, padbit); poly1305_blocks_neon(state, src, todo, padbit);
kernel_neon_end();
len -= todo; len -= todo;
src += todo; src += todo;

View File

@ -4,7 +4,6 @@
* *
* Copyright 2025 Google LLC * Copyright 2025 Google LLC
*/ */
#include <asm/neon.h>
#include <asm/simd.h> #include <asm/simd.h>
#include <linux/cpufeature.h> #include <linux/cpufeature.h>
@ -20,9 +19,9 @@ static void sha1_blocks(struct sha1_block_state *state,
do { do {
size_t rem; size_t rem;
kernel_neon_begin(); scoped_ksimd()
rem = __sha1_ce_transform(state, data, nblocks); rem = __sha1_ce_transform(state, data, nblocks);
kernel_neon_end();
data += (nblocks - rem) * SHA1_BLOCK_SIZE; data += (nblocks - rem) * SHA1_BLOCK_SIZE;
nblocks = rem; nblocks = rem;
} while (nblocks); } while (nblocks);

View File

@ -4,7 +4,6 @@
* *
* Copyright 2025 Google LLC * Copyright 2025 Google LLC
*/ */
#include <asm/neon.h>
#include <asm/simd.h> #include <asm/simd.h>
#include <linux/cpufeature.h> #include <linux/cpufeature.h>
@ -27,17 +26,16 @@ static void sha256_blocks(struct sha256_block_state *state,
do { do {
size_t rem; size_t rem;
kernel_neon_begin(); scoped_ksimd()
rem = __sha256_ce_transform(state, rem = __sha256_ce_transform(state, data,
data, nblocks); nblocks);
kernel_neon_end();
data += (nblocks - rem) * SHA256_BLOCK_SIZE; data += (nblocks - rem) * SHA256_BLOCK_SIZE;
nblocks = rem; nblocks = rem;
} while (nblocks); } while (nblocks);
} else { } else {
kernel_neon_begin(); scoped_ksimd()
sha256_block_neon(state, data, nblocks); sha256_block_neon(state, data, nblocks);
kernel_neon_end();
} }
} else { } else {
sha256_block_data_order(state, data, nblocks); sha256_block_data_order(state, data, nblocks);
@ -66,9 +64,8 @@ static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx,
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE && static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE &&
len <= 65536 && likely(may_use_simd())) { len <= 65536 && likely(may_use_simd())) {
kernel_neon_begin(); scoped_ksimd()
sha256_ce_finup2x(ctx, data1, data2, len, out1, out2); sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
kernel_neon_end();
kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE); kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE);
kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE); kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE);
return true; return true;

View File

@ -4,7 +4,7 @@
* *
* Copyright 2025 Google LLC * Copyright 2025 Google LLC
*/ */
#include <asm/neon.h>
#include <asm/simd.h> #include <asm/simd.h>
#include <linux/cpufeature.h> #include <linux/cpufeature.h>
@ -24,9 +24,9 @@ static void sha512_blocks(struct sha512_block_state *state,
do { do {
size_t rem; size_t rem;
kernel_neon_begin(); scoped_ksimd()
rem = __sha512_ce_transform(state, data, nblocks); rem = __sha512_ce_transform(state, data, nblocks);
kernel_neon_end();
data += (nblocks - rem) * SHA512_BLOCK_SIZE; data += (nblocks - rem) * SHA512_BLOCK_SIZE;
nblocks = rem; nblocks = rem;
} while (nblocks); } while (nblocks);