arm64 FPSIMD buffer on-stack for 6.19

In v6.8, the size of task_struct on arm64 increased by 528 bytes due
 to the new 'kernel_fpsimd_state' field. This field was added to allow
 kernel-mode FPSIMD code to be preempted.
 
 Unfortunately, 528 bytes is kind of a lot for task_struct. This
 regression in the task_struct size was noticed and reported.
 
 Recover that space by making this state be allocated on the stack at
 the beginning of each kernel-mode FPSIMD section.
 
 To make it easier for all the users of kernel-mode FPSIMD to do that
 correctly, introduce and use a 'scoped_ksimd' abstraction.
 -----BEGIN PGP SIGNATURE-----
 
 iIoEABYIADIWIQSacvsUNc7UX4ntmEPzXCl4vpKOKwUCaSuxbxQcZWJpZ2dlcnNA
 a2VybmVsLm9yZwAKCRDzXCl4vpKOKysnAQCbN4Jed8IqwGUEkkjZrnMeN0pEO4RI
 lAhb2Obj3n/grQEAiPBmqWVjXaIPO4lSgLQxY6XoVLr+utMod4TMTYHfnAY=
 =0zQQ
 -----END PGP SIGNATURE-----

Merge tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux

Pull arm64 FPSIMD on-stack buffer updates from Eric Biggers:
 "This is a core arm64 change. However, I was asked to take this because
  most uses of kernel-mode FPSIMD are in crypto or CRC code.

  In v6.8, the size of task_struct on arm64 increased by 528 bytes due
  to the new 'kernel_fpsimd_state' field. This field was added to allow
  kernel-mode FPSIMD code to be preempted.

  Unfortunately, 528 bytes is kind of a lot for task_struct. This
  regression in the task_struct size was noticed and reported.

  Recover that space by making this state be allocated on the stack at
  the beginning of each kernel-mode FPSIMD section.

  To make it easier for all the users of kernel-mode FPSIMD to do that
  correctly, introduce and use a 'scoped_ksimd' abstraction"

* tag 'fpsimd-on-stack-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (23 commits)
  lib/crypto: arm64: Move remaining algorithms to scoped ksimd API
  lib/crypto: arm/blake2b: Move to scoped ksimd API
  arm64/fpsimd: Allocate kernel mode FP/SIMD buffers on the stack
  arm64/fpu: Enforce task-context only for generic kernel mode FPU
  net/mlx5: Switch to more abstract scoped ksimd guard API on arm64
  arm64/xorblocks:  Switch to 'ksimd' scoped guard API
  crypto/arm64: sm4 - Switch to 'ksimd' scoped guard API
  crypto/arm64: sm3 - Switch to 'ksimd' scoped guard API
  crypto/arm64: sha3 - Switch to 'ksimd' scoped guard API
  crypto/arm64: polyval - Switch to 'ksimd' scoped guard API
  crypto/arm64: nhpoly1305 - Switch to 'ksimd' scoped guard API
  crypto/arm64: aes-gcm - Switch to 'ksimd' scoped guard API
  crypto/arm64: aes-blk - Switch to 'ksimd' scoped guard API
  crypto/arm64: aes-ccm - Switch to 'ksimd' scoped guard API
  raid6: Move to more abstract 'ksimd' guard API
  crypto: aegis128-neon - Move to more abstract 'ksimd' guard API
  crypto/arm64: sm4-ce-gcm - Avoid pointless yield of the NEON unit
  crypto/arm64: sm4-ce-ccm - Avoid pointless yield of the NEON unit
  crypto/arm64: aes-ce-ccm - Avoid pointless yield of the NEON unit
  lib/crc: Switch ARM and arm64 to 'ksimd' scoped guard API
  ...
This commit is contained in:
Linus Torvalds 2025-12-02 18:53:50 -08:00
commit f617d24606
42 changed files with 614 additions and 709 deletions

View File

@ -2,14 +2,21 @@
#ifndef _ASM_SIMD_H
#define _ASM_SIMD_H
#include <linux/cleanup.h>
#include <linux/compiler_attributes.h>
#include <linux/preempt.h>
#include <linux/types.h>
#include <asm/neon.h>
static __must_check inline bool may_use_simd(void)
{
return IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && !in_hardirq()
&& !irqs_disabled();
}
DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
#define scoped_ksimd() scoped_guard(ksimd)
#endif /* _ASM_SIMD_H */

View File

@ -8,7 +8,6 @@
* Author: Ard Biesheuvel <ardb@kernel.org>
*/
#include <asm/neon.h>
#include <linux/unaligned.h>
#include <crypto/aes.h>
#include <crypto/scatterwalk.h>
@ -16,6 +15,8 @@
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
#include <asm/simd.h>
#include "aes-ce-setkey.h"
MODULE_IMPORT_NS("CRYPTO_INTERNAL");
@ -114,11 +115,8 @@ static u32 ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
in += adv;
abytes -= adv;
if (unlikely(rem)) {
kernel_neon_end();
kernel_neon_begin();
if (unlikely(rem))
macp = 0;
}
} else {
u32 l = min(AES_BLOCK_SIZE - macp, abytes);
@ -187,8 +185,7 @@ static int ccm_encrypt(struct aead_request *req)
if (unlikely(err))
return err;
kernel_neon_begin();
scoped_ksimd() {
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
@ -219,8 +216,7 @@ static int ccm_encrypt(struct aead_request *req)
err = skcipher_walk_done(&walk, tail);
}
} while (walk.nbytes);
kernel_neon_end();
}
if (unlikely(err))
return err;
@ -254,8 +250,7 @@ static int ccm_decrypt(struct aead_request *req)
if (unlikely(err))
return err;
kernel_neon_begin();
scoped_ksimd() {
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
@ -286,8 +281,7 @@ static int ccm_decrypt(struct aead_request *req)
err = skcipher_walk_done(&walk, tail);
}
} while (walk.nbytes);
kernel_neon_end();
}
if (unlikely(err))
return err;

View File

@ -52,9 +52,8 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
return;
}
kernel_neon_begin();
scoped_ksimd()
__aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
kernel_neon_end();
}
static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
@ -66,9 +65,8 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
return;
}
kernel_neon_begin();
scoped_ksimd()
__aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
kernel_neon_end();
}
int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
@ -94,12 +92,13 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
for (i = 0; i < kwords; i++)
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
kernel_neon_begin();
scoped_ksimd() {
for (i = 0; i < sizeof(rcon); i++) {
u32 *rki = ctx->key_enc + (i * kwords);
u32 *rko = rki + kwords;
rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^
rcon[i] ^ rki[0];
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
@ -120,10 +119,10 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
}
/*
* Generate the decryption keys for the Equivalent Inverse Cipher.
* This involves reversing the order of the round keys, and applying
* the Inverse Mix Columns transformation on all but the first and
* the last one.
* Generate the decryption keys for the Equivalent Inverse
* Cipher. This involves reversing the order of the round
* keys, and applying the Inverse Mix Columns transformation on
* all but the first and the last one.
*/
key_enc = (struct aes_block *)ctx->key_enc;
key_dec = (struct aes_block *)ctx->key_dec;
@ -133,8 +132,8 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
for (i = 1, j--; j > 0; i++, j--)
__aes_ce_invert(key_dec + i, key_enc + j);
key_dec[i] = key_enc[0];
}
kernel_neon_end();
return 0;
}
EXPORT_SYMBOL(ce_aes_expandkey);

View File

@ -5,8 +5,6 @@
* Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/ctr.h>
#include <crypto/internal/hash.h>
@ -20,6 +18,9 @@
#include <linux/module.h>
#include <linux/string.h>
#include <asm/hwcap.h>
#include <asm/simd.h>
#include "aes-ce-setkey.h"
#ifdef USE_V8_CRYPTO_EXTENSIONS
@ -186,10 +187,9 @@ static int __maybe_unused ecb_encrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
scoped_ksimd()
aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, rounds, blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@ -206,10 +206,9 @@ static int __maybe_unused ecb_decrypt(struct skcipher_request *req)
err = skcipher_walk_virt(&walk, req, false);
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
scoped_ksimd()
aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_dec, rounds, blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@ -224,10 +223,9 @@ static int cbc_encrypt_walk(struct skcipher_request *req,
unsigned int blocks;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
scoped_ksimd()
aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
ctx->key_enc, rounds, blocks, walk->iv);
kernel_neon_end();
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
@ -253,10 +251,9 @@ static int cbc_decrypt_walk(struct skcipher_request *req,
unsigned int blocks;
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
kernel_neon_begin();
scoped_ksimd()
aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
ctx->key_dec, rounds, blocks, walk->iv);
kernel_neon_end();
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
}
return err;
@ -322,10 +319,9 @@ static int cts_cbc_encrypt(struct skcipher_request *req)
if (err)
return err;
kernel_neon_begin();
scoped_ksimd()
aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_enc, rounds, walk.nbytes, walk.iv);
kernel_neon_end();
return skcipher_walk_done(&walk, 0);
}
@ -379,10 +375,9 @@ static int cts_cbc_decrypt(struct skcipher_request *req)
if (err)
return err;
kernel_neon_begin();
scoped_ksimd()
aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key_dec, rounds, walk.nbytes, walk.iv);
kernel_neon_end();
return skcipher_walk_done(&walk, 0);
}
@ -399,11 +394,11 @@ static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req)
blocks = walk.nbytes / AES_BLOCK_SIZE;
if (blocks) {
kernel_neon_begin();
aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
scoped_ksimd()
aes_essiv_cbc_encrypt(walk.dst.virt.addr,
walk.src.virt.addr,
ctx->key1.key_enc, rounds, blocks,
req->iv, ctx->key2.key_enc);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err ?: cbc_encrypt_walk(req, &walk);
@ -421,11 +416,11 @@ static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
blocks = walk.nbytes / AES_BLOCK_SIZE;
if (blocks) {
kernel_neon_begin();
aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
scoped_ksimd()
aes_essiv_cbc_decrypt(walk.dst.virt.addr,
walk.src.virt.addr,
ctx->key1.key_dec, rounds, blocks,
req->iv, ctx->key2.key_enc);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err ?: cbc_decrypt_walk(req, &walk);
@ -461,10 +456,9 @@ static int __maybe_unused xctr_encrypt(struct skcipher_request *req)
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
kernel_neon_begin();
scoped_ksimd()
aes_xctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
walk.iv, byte_ctr);
kernel_neon_end();
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(walk.dst.virt.addr,
@ -506,10 +500,9 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req)
else if (nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
kernel_neon_begin();
scoped_ksimd()
aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
walk.iv);
kernel_neon_end();
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(walk.dst.virt.addr,
@ -562,11 +555,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
kernel_neon_begin();
scoped_ksimd()
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_enc, rounds, nbytes,
ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@ -584,11 +576,10 @@ static int __maybe_unused xts_encrypt(struct skcipher_request *req)
if (err)
return err;
kernel_neon_begin();
scoped_ksimd()
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_enc, rounds, walk.nbytes,
ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
return skcipher_walk_done(&walk, 0);
}
@ -634,11 +625,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
if (walk.nbytes < walk.total)
nbytes &= ~(AES_BLOCK_SIZE - 1);
kernel_neon_begin();
scoped_ksimd()
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_dec, rounds, nbytes,
ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@ -657,11 +647,10 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req)
return err;
kernel_neon_begin();
scoped_ksimd()
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key1.key_dec, rounds, walk.nbytes,
ctx->key2.key_enc, walk.iv, first);
kernel_neon_end();
return skcipher_walk_done(&walk, 0);
}
@ -808,10 +797,9 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
return err;
/* encrypt the zero vector */
kernel_neon_begin();
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
rounds, 1);
kernel_neon_end();
scoped_ksimd()
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){},
ctx->key.key_enc, rounds, 1);
cmac_gf128_mul_by_x(consts, consts);
cmac_gf128_mul_by_x(consts + 1, consts);
@ -837,10 +825,10 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
if (err)
return err;
kernel_neon_begin();
scoped_ksimd() {
aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
kernel_neon_end();
}
return cbcmac_setkey(tfm, key, sizeof(key));
}
@ -860,10 +848,9 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
int rem;
do {
kernel_neon_begin();
scoped_ksimd()
rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
dg, enc_before, !enc_before);
kernel_neon_end();
in += (blocks - rem) * AES_BLOCK_SIZE;
blocks = rem;
} while (blocks);

View File

@ -85,9 +85,8 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
ctx->rounds = 6 + key_len / 4;
kernel_neon_begin();
scoped_ksimd()
aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds);
kernel_neon_end();
return 0;
}
@ -110,10 +109,9 @@ static int __ecb_crypt(struct skcipher_request *req,
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
kernel_neon_begin();
scoped_ksimd()
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
ctx->rounds, blocks);
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
@ -146,9 +144,8 @@ static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc));
kernel_neon_begin();
scoped_ksimd()
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
kernel_neon_end();
memzero_explicit(&rk, sizeof(rk));
return 0;
@ -167,11 +164,11 @@ static int cbc_encrypt(struct skcipher_request *req)
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
/* fall back to the non-bitsliced NEON implementation */
kernel_neon_begin();
neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
scoped_ksimd()
neon_aes_cbc_encrypt(walk.dst.virt.addr,
walk.src.virt.addr,
ctx->enc, ctx->key.rounds, blocks,
walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
return err;
@ -193,11 +190,10 @@ static int cbc_decrypt(struct skcipher_request *req)
blocks = round_down(blocks,
walk.stride / AES_BLOCK_SIZE);
kernel_neon_begin();
scoped_ksimd()
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
ctx->key.rk, ctx->key.rounds, blocks,
walk.iv);
kernel_neon_end();
err = skcipher_walk_done(&walk,
walk.nbytes - blocks * AES_BLOCK_SIZE);
}
@ -220,10 +216,11 @@ static int ctr_encrypt(struct skcipher_request *req)
const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
kernel_neon_begin();
scoped_ksimd() {
if (blocks >= 8) {
aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds,
blocks, walk.iv);
aesbs_ctr_encrypt(dst, src, ctx->key.rk,
ctx->key.rounds, blocks,
walk.iv);
dst += blocks * AES_BLOCK_SIZE;
src += blocks * AES_BLOCK_SIZE;
}
@ -232,18 +229,19 @@ static int ctr_encrypt(struct skcipher_request *req)
u8 *d = dst;
if (unlikely(nbytes < AES_BLOCK_SIZE))
src = dst = memcpy(buf + sizeof(buf) - nbytes,
src, nbytes);
src = dst = memcpy(buf + sizeof(buf) -
nbytes, src, nbytes);
neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds,
nbytes, walk.iv);
neon_aes_ctr_encrypt(dst, src, ctx->enc,
ctx->key.rounds, nbytes,
walk.iv);
if (unlikely(nbytes < AES_BLOCK_SIZE))
memcpy(d, dst, nbytes);
nbytes = 0;
}
kernel_neon_end();
}
err = skcipher_walk_done(&walk, nbytes);
}
return err;
@ -320,7 +318,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
in = walk.src.virt.addr;
nbytes = walk.nbytes;
kernel_neon_begin();
scoped_ksimd() {
if (blocks >= 8) {
if (first == 1)
neon_aes_ecb_encrypt(walk.iv, walk.iv,
@ -346,7 +344,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
ctx->twkey, walk.iv, first);
nbytes = first = 0;
}
kernel_neon_end();
}
err = skcipher_walk_done(&walk, nbytes);
}
@ -369,14 +367,16 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
in = walk.src.virt.addr;
nbytes = walk.nbytes;
kernel_neon_begin();
scoped_ksimd() {
if (encrypt)
neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
nbytes, ctx->twkey, walk.iv, first);
neon_aes_xts_encrypt(out, in, ctx->cts.key_enc,
ctx->key.rounds, nbytes, ctx->twkey,
walk.iv, first);
else
neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
nbytes, ctx->twkey, walk.iv, first);
kernel_neon_end();
neon_aes_xts_decrypt(out, in, ctx->cts.key_dec,
ctx->key.rounds, nbytes, ctx->twkey,
walk.iv, first);
}
return skcipher_walk_done(&walk, 0);
}

View File

@ -5,7 +5,6 @@
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/
#include <asm/neon.h>
#include <crypto/aes.h>
#include <crypto/b128ops.h>
#include <crypto/gcm.h>
@ -22,6 +21,8 @@
#include <linux/string.h>
#include <linux/unaligned.h>
#include <asm/simd.h>
MODULE_DESCRIPTION("GHASH and AES-GCM using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@ -74,9 +75,8 @@ void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
u64 const h[][2],
const char *head))
{
kernel_neon_begin();
scoped_ksimd()
simd_update(blocks, dg, src, key->h, head);
kernel_neon_end();
}
/* avoid hogging the CPU for too long */
@ -329,11 +329,10 @@ static int gcm_encrypt(struct aead_request *req, char *iv, int assoclen)
tag = NULL;
}
kernel_neon_begin();
scoped_ksimd()
pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
dg, iv, ctx->aes_key.key_enc, nrounds,
tag);
kernel_neon_end();
if (unlikely(!nbytes))
break;
@ -399,11 +398,11 @@ static int gcm_decrypt(struct aead_request *req, char *iv, int assoclen)
tag = NULL;
}
kernel_neon_begin();
ret = pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
scoped_ksimd()
ret = pmull_gcm_decrypt(nbytes, dst, src,
ctx->ghash_key.h,
dg, iv, ctx->aes_key.key_enc,
nrounds, tag, otag, authsize);
kernel_neon_end();
if (unlikely(!nbytes))
break;

View File

@ -25,9 +25,8 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
do {
unsigned int n = min_t(unsigned int, srclen, SZ_4K);
kernel_neon_begin();
scoped_ksimd()
crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
kernel_neon_end();
src += n;
srclen -= n;
} while (srclen);

View File

@ -5,7 +5,6 @@
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
#include <asm/neon.h>
#include <crypto/internal/hash.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
@ -13,6 +12,8 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/simd.h>
MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions");
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_LICENSE("GPL v2");
@ -25,18 +26,18 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
{
int remain;
kernel_neon_begin();
scoped_ksimd() {
remain = sm3_base_do_update_blocks(desc, data, len, sm3_ce_transform);
kernel_neon_end();
}
return remain;
}
static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
kernel_neon_begin();
scoped_ksimd() {
sm3_base_do_finup(desc, data, len, sm3_ce_transform);
kernel_neon_end();
}
return sm3_base_finish(desc, out);
}

View File

@ -5,7 +5,7 @@
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/internal/hash.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
@ -20,20 +20,16 @@ asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
static int sm3_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
int remain;
kernel_neon_begin();
remain = sm3_base_do_update_blocks(desc, data, len, sm3_neon_transform);
kernel_neon_end();
return remain;
scoped_ksimd()
return sm3_base_do_update_blocks(desc, data, len,
sm3_neon_transform);
}
static int sm3_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
kernel_neon_begin();
scoped_ksimd()
sm3_base_do_finup(desc, data, len, sm3_neon_transform);
kernel_neon_end();
return sm3_base_finish(desc, out);
}

View File

@ -11,7 +11,7 @@
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
@ -35,10 +35,9 @@ static int ccm_setkey(struct crypto_aead *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
scoped_ksimd()
sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
return 0;
}
@ -167,39 +166,23 @@ static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE);
crypto_inc(walk->iv, SM4_BLOCK_SIZE);
kernel_neon_begin();
scoped_ksimd() {
if (req->assoclen)
ccm_calculate_auth_mac(req, mac);
while (walk->nbytes && walk->nbytes != walk->total) {
while (walk->nbytes) {
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
if (walk->nbytes == walk->total)
tail = 0;
sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
walk->src.virt.addr, walk->iv,
walk->nbytes - tail, mac);
kernel_neon_end();
err = skcipher_walk_done(walk, tail);
kernel_neon_begin();
}
if (walk->nbytes) {
sm4_ce_ccm_crypt(rkey_enc, walk->dst.virt.addr,
walk->src.virt.addr, walk->iv,
walk->nbytes, mac);
sm4_ce_ccm_final(rkey_enc, ctr0, mac);
kernel_neon_end();
err = skcipher_walk_done(walk, 0);
} else {
sm4_ce_ccm_final(rkey_enc, ctr0, mac);
kernel_neon_end();
}
return err;

View File

@ -32,9 +32,8 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_enc, out, in);
} else {
kernel_neon_begin();
scoped_ksimd()
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
kernel_neon_end();
}
}
@ -45,9 +44,8 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
if (!crypto_simd_usable()) {
sm4_crypt_block(ctx->rkey_dec, out, in);
} else {
kernel_neon_begin();
scoped_ksimd()
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
kernel_neon_end();
}
}

View File

@ -11,7 +11,7 @@
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/b128ops.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
@ -48,13 +48,11 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
scoped_ksimd() {
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
kernel_neon_end();
}
return 0;
}
@ -149,44 +147,28 @@ static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
memcpy(iv, req->iv, GCM_IV_SIZE);
put_unaligned_be32(2, iv + GCM_IV_SIZE);
kernel_neon_begin();
scoped_ksimd() {
if (req->assoclen)
gcm_calculate_auth_mac(req, ghash);
while (walk->nbytes) {
do {
unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
const u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
const u8 *l = NULL;
if (walk->nbytes == walk->total) {
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes, ghash,
ctx->ghash_table,
(const u8 *)&lengths);
kernel_neon_end();
return skcipher_walk_done(walk, 0);
l = (const u8 *)&lengths;
tail = 0;
}
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
walk->nbytes - tail, ghash,
ctx->ghash_table, NULL);
kernel_neon_end();
ctx->ghash_table, l);
err = skcipher_walk_done(walk, tail);
kernel_neon_begin();
} while (walk->nbytes);
}
sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, NULL, NULL, iv,
walk->nbytes, ghash, ctx->ghash_table,
(const u8 *)&lengths);
kernel_neon_end();
return err;
}

View File

@ -8,7 +8,7 @@
* Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/b128ops.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/skcipher.h>
@ -74,10 +74,9 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
scoped_ksimd()
sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
return 0;
}
@ -94,12 +93,12 @@ static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
if (ret)
return ret;
kernel_neon_begin();
scoped_ksimd() {
sm4_ce_expand_key(key, ctx->key1.rkey_enc,
ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
}
return 0;
}
@ -117,15 +116,13 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
scoped_ksimd() {
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_crypt(rkey, dst, src, nblks);
nbytes -= nblks * SM4_BLOCK_SIZE;
}
kernel_neon_end();
}
err = skcipher_walk_done(&walk, nbytes);
}
@ -167,16 +164,14 @@ static int sm4_cbc_crypt(struct skcipher_request *req,
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
kernel_neon_begin();
scoped_ksimd() {
if (encrypt)
sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
walk.iv, nblocks);
else
sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
walk.iv, nblocks);
kernel_neon_end();
}
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@ -249,16 +244,14 @@ static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt)
if (err)
return err;
kernel_neon_begin();
scoped_ksimd() {
if (encrypt)
sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, walk.nbytes);
else
sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, walk.nbytes);
kernel_neon_end();
}
return skcipher_walk_done(&walk, 0);
}
@ -288,8 +281,7 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
u8 *dst = walk.dst.virt.addr;
unsigned int nblks;
kernel_neon_begin();
scoped_ksimd() {
nblks = BYTES2BLKS(nbytes);
if (nblks) {
sm4_ce_ctr_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
@ -307,8 +299,7 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
crypto_xor_cpy(dst, src, keystream, nbytes);
nbytes = 0;
}
kernel_neon_end();
}
err = skcipher_walk_done(&walk, nbytes);
}
@ -359,8 +350,7 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
if (nbytes < walk.total)
nbytes &= ~(SM4_BLOCK_SIZE - 1);
kernel_neon_begin();
scoped_ksimd() {
if (encrypt)
sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, nbytes,
@ -369,8 +359,7 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, nbytes,
rkey2_enc);
kernel_neon_end();
}
rkey2_enc = NULL;
@ -395,8 +384,7 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
if (err)
return err;
kernel_neon_begin();
scoped_ksimd() {
if (encrypt)
sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, walk.nbytes,
@ -405,8 +393,7 @@ static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
walk.src.virt.addr, walk.iv, walk.nbytes,
rkey2_enc);
kernel_neon_end();
}
return skcipher_walk_done(&walk, 0);
}
@ -510,11 +497,9 @@ static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
scoped_ksimd()
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
return 0;
}
@ -530,15 +515,13 @@ static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key,
memset(consts, 0, SM4_BLOCK_SIZE);
kernel_neon_begin();
scoped_ksimd() {
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
/* encrypt the zero block */
sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
kernel_neon_end();
}
/* gf(2^128) multiply zero-ciphertext with u and u^2 */
a = be64_to_cpu(consts[0].a);
@ -568,8 +551,7 @@ static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
if (key_len != SM4_KEY_SIZE)
return -EINVAL;
kernel_neon_begin();
scoped_ksimd() {
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
@ -578,8 +560,7 @@ static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
crypto_sm4_fk, crypto_sm4_ck);
kernel_neon_end();
}
return 0;
}
@ -600,10 +581,9 @@ static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
unsigned int nblocks = len / SM4_BLOCK_SIZE;
len %= SM4_BLOCK_SIZE;
kernel_neon_begin();
scoped_ksimd()
sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
nblocks, false, true);
kernel_neon_end();
return len;
}
@ -619,10 +599,9 @@ static int sm4_cmac_finup(struct shash_desc *desc, const u8 *src,
ctx->digest[len] ^= 0x80;
consts += SM4_BLOCK_SIZE;
}
kernel_neon_begin();
scoped_ksimd()
sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
false, true);
kernel_neon_end();
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;
}
@ -635,10 +614,9 @@ static int sm4_cbcmac_finup(struct shash_desc *desc, const u8 *src,
if (len) {
crypto_xor(ctx->digest, src, len);
kernel_neon_begin();
scoped_ksimd()
sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
ctx->digest);
kernel_neon_end();
}
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
return 0;

View File

@ -48,11 +48,8 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
kernel_neon_begin();
scoped_ksimd()
sm4_neon_crypt(rkey, dst, src, nblocks);
kernel_neon_end();
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@ -126,12 +123,9 @@ static int sm4_cbc_decrypt(struct skcipher_request *req)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
kernel_neon_begin();
scoped_ksimd()
sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
walk.iv, nblocks);
kernel_neon_end();
}
err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
@ -157,13 +151,10 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
nblocks = nbytes / SM4_BLOCK_SIZE;
if (nblocks) {
kernel_neon_begin();
scoped_ksimd()
sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
walk.iv, nblocks);
kernel_neon_end();
dst += nblocks * SM4_BLOCK_SIZE;
src += nblocks * SM4_BLOCK_SIZE;
nbytes -= nblocks * SM4_BLOCK_SIZE;

View File

@ -6,10 +6,22 @@
#ifndef __ASM_FPU_H
#define __ASM_FPU_H
#include <linux/preempt.h>
#include <asm/neon.h>
#define kernel_fpu_available() cpu_has_neon()
#define kernel_fpu_begin() kernel_neon_begin()
#define kernel_fpu_end() kernel_neon_end()
static inline void kernel_fpu_begin(void)
{
BUG_ON(!in_task());
preempt_disable();
kernel_neon_begin(NULL);
}
static inline void kernel_fpu_end(void)
{
kernel_neon_end(NULL);
preempt_enable();
}
#endif /* ! __ASM_FPU_H */

View File

@ -13,7 +13,7 @@
#define cpu_has_neon() system_supports_fpsimd()
void kernel_neon_begin(void);
void kernel_neon_end(void);
void kernel_neon_begin(struct user_fpsimd_state *);
void kernel_neon_end(struct user_fpsimd_state *);
#endif /* ! __ASM_NEON_H */

View File

@ -172,7 +172,12 @@ struct thread_struct {
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
struct user_fpsimd_state kernel_fpsimd_state;
/*
* Set [cleared] by kernel_neon_begin() [kernel_neon_end()] to the
* address of a caller provided buffer that will be used to preserve a
* task's kernel mode FPSIMD state while it is scheduled out.
*/
struct user_fpsimd_state *kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;

View File

@ -6,12 +6,15 @@
#ifndef __ASM_SIMD_H
#define __ASM_SIMD_H
#include <linux/cleanup.h>
#include <linux/compiler.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/preempt.h>
#include <linux/types.h>
#include <asm/neon.h>
#ifdef CONFIG_KERNEL_MODE_NEON
/*
@ -40,4 +43,11 @@ static __must_check inline bool may_use_simd(void) {
#endif /* ! CONFIG_KERNEL_MODE_NEON */
DEFINE_LOCK_GUARD_1(ksimd,
struct user_fpsimd_state,
kernel_neon_begin(_T->lock),
kernel_neon_end(_T->lock))
#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
#endif

View File

@ -9,7 +9,7 @@
#include <linux/hardirq.h>
#include <asm-generic/xor.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#ifdef CONFIG_KERNEL_MODE_NEON
@ -19,9 +19,8 @@ static void
xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2)
{
kernel_neon_begin();
scoped_ksimd()
xor_block_inner_neon.do_2(bytes, p1, p2);
kernel_neon_end();
}
static void
@ -29,9 +28,8 @@ xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p2,
const unsigned long * __restrict p3)
{
kernel_neon_begin();
scoped_ksimd()
xor_block_inner_neon.do_3(bytes, p1, p2, p3);
kernel_neon_end();
}
static void
@ -40,9 +38,8 @@ xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p3,
const unsigned long * __restrict p4)
{
kernel_neon_begin();
scoped_ksimd()
xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
kernel_neon_end();
}
static void
@ -52,9 +49,8 @@ xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
const unsigned long * __restrict p4,
const unsigned long * __restrict p5)
{
kernel_neon_begin();
scoped_ksimd()
xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
kernel_neon_end();
}
static struct xor_block_template xor_block_arm64 = {

View File

@ -1502,21 +1502,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
if (last->st == &task->thread.kernel_fpsimd_state &&
if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
fpsimd_load_state(&task->thread.kernel_fpsimd_state);
fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
.st = &task->thread.kernel_fpsimd_state,
.st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
fpsimd_save_state(&task->thread.kernel_fpsimd_state);
BUG_ON(!cpu_fp_state.st);
fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@ -1787,6 +1789,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@ -1846,12 +1849,19 @@ void fpsimd_save_and_flush_cpu_state(void)
*
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
*
* Unless called from non-preemptible task context, @state must point to a
* caller provided buffer that will be used to preserve the task's kernel mode
* FPSIMD context when it is scheduled out, or if it is interrupted by kernel
* mode FPSIMD occurring in softirq context. May be %NULL otherwise.
*/
void kernel_neon_begin(void)
void kernel_neon_begin(struct user_fpsimd_state *state)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
WARN_ON((preemptible() || in_serving_softirq()) && !state);
BUG_ON(!may_use_simd());
get_cpu_fpsimd_context();
@ -1859,7 +1869,7 @@ void kernel_neon_begin(void)
/* Save unsaved fpsimd state, if any: */
if (test_thread_flag(TIF_KERNEL_FPSTATE)) {
BUG_ON(IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq());
fpsimd_save_kernel_state(current);
fpsimd_save_state(state);
} else {
fpsimd_save_user_state();
@ -1880,9 +1890,17 @@ void kernel_neon_begin(void)
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
/*
* Record the caller provided buffer as the kernel mode
* FP/SIMD buffer for this task, so that the state can
* be preserved and restored on a context switch.
*/
WARN_ON(current->thread.kernel_fpsimd_state != NULL);
current->thread.kernel_fpsimd_state = state;
set_thread_flag(TIF_KERNEL_FPSTATE);
}
}
/* Invalidate any task state remaining in the fpsimd regs: */
fpsimd_flush_cpu_state();
@ -1899,22 +1917,30 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
*
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
*
* The value of @state must match the value passed to the preceding call to
* kernel_neon_begin().
*/
void kernel_neon_end(void)
void kernel_neon_end(struct user_fpsimd_state *state)
{
if (!system_supports_fpsimd())
return;
if (!test_thread_flag(TIF_KERNEL_FPSTATE))
return;
/*
* If we are returning from a nested use of kernel mode FPSIMD, restore
* the task context kernel mode FPSIMD state. This can only happen when
* running in softirq context on non-PREEMPT_RT.
*/
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
test_thread_flag(TIF_KERNEL_FPSTATE))
fpsimd_load_kernel_state(current);
else
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq()) {
fpsimd_load_state(state);
} else {
clear_thread_flag(TIF_KERNEL_FPSTATE);
WARN_ON(current->thread.kernel_fpsimd_state != state);
current->thread.kernel_fpsimd_state = NULL;
}
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
@ -1948,7 +1974,7 @@ void __efi_fpsimd_begin(void)
return;
if (may_use_simd()) {
kernel_neon_begin();
kernel_neon_begin(&efi_fpsimd_state);
} else {
WARN_ON(preemptible());
@ -1999,7 +2025,7 @@ void __efi_fpsimd_end(void)
return;
if (!efi_fpsimd_state_used) {
kernel_neon_end();
kernel_neon_end(&efi_fpsimd_state);
} else {
if (system_supports_sve() && efi_sve_state_used) {
bool ffr = true;

View File

@ -4,7 +4,7 @@
*/
#include <asm/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include "aegis.h"
#include "aegis-neon.h"
@ -24,32 +24,28 @@ void crypto_aegis128_init_simd(struct aegis_state *state,
const union aegis_block *key,
const u8 *iv)
{
kernel_neon_begin();
scoped_ksimd()
crypto_aegis128_init_neon(state, key, iv);
kernel_neon_end();
}
void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg)
{
kernel_neon_begin();
scoped_ksimd()
crypto_aegis128_update_neon(state, msg);
kernel_neon_end();
}
void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
kernel_neon_begin();
scoped_ksimd()
crypto_aegis128_encrypt_chunk_neon(state, dst, src, size);
kernel_neon_end();
}
void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
const u8 *src, unsigned int size)
{
kernel_neon_begin();
scoped_ksimd()
crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
kernel_neon_end();
}
int crypto_aegis128_final_simd(struct aegis_state *state,
@ -58,12 +54,7 @@ int crypto_aegis128_final_simd(struct aegis_state *state,
unsigned int cryptlen,
unsigned int authsize)
{
int ret;
kernel_neon_begin();
ret = crypto_aegis128_final_neon(state, tag_xor, assoclen, cryptlen,
authsize);
kernel_neon_end();
return ret;
scoped_ksimd()
return crypto_aegis128_final_neon(state, tag_xor, assoclen,
cryptlen, authsize);
}

View File

@ -9,6 +9,7 @@
#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64)
#include <asm/neon.h>
#include <asm/simd.h>
#endif
#define TEST_WC_NUM_WQES 255
@ -264,15 +265,15 @@ static void mlx5_iowrite64_copy(struct mlx5_wc_sq *sq, __be32 mmio_wqe[16],
{
#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64)
if (cpu_has_neon()) {
kernel_neon_begin();
asm volatile
(".arch_extension simd\n\t"
scoped_ksimd() {
asm volatile(
".arch_extension simd\n\t"
"ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0]\n\t"
"st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1]"
:
: "r"(mmio_wqe), "r"(sq->bfreg.map + offset)
: "memory", "v0", "v1", "v2", "v3");
kernel_neon_end();
}
return;
}
#endif

View File

@ -5,7 +5,6 @@
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
*/
#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@ -19,22 +18,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len,
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
{
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
if (static_branch_likely(&have_pmull)) {
if (likely(may_use_simd())) {
kernel_neon_begin();
crc = crc_t10dif_pmull64(crc, data, length);
kernel_neon_end();
return crc;
}
scoped_ksimd()
return crc_t10dif_pmull64(crc, data, length);
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
static_branch_likely(&have_neon) &&
likely(may_use_simd())) {
static_branch_likely(&have_neon)) {
u8 buf[16] __aligned(16);
kernel_neon_begin();
scoped_ksimd()
crc_t10dif_pmull8(crc, data, length, buf);
kernel_neon_end();
return crc_t10dif_generic(0, buf, sizeof(buf));
}

View File

@ -8,7 +8,6 @@
#include <linux/cpufeature.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32);
@ -42,9 +41,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
len -= n;
}
n = round_down(len, 16);
kernel_neon_begin();
scoped_ksimd()
crc = crc32_pmull_le(p, n, crc);
kernel_neon_end();
p += n;
len -= n;
}
@ -71,9 +69,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
len -= n;
}
n = round_down(len, 16);
kernel_neon_begin();
scoped_ksimd()
crc = crc32c_pmull_le(p, n, crc);
kernel_neon_end();
p += n;
len -= n;
}

View File

@ -7,7 +7,6 @@
#include <linux/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd);
@ -21,22 +20,16 @@ asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length)
{
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) {
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) {
if (static_branch_likely(&have_pmull)) {
if (likely(may_use_simd())) {
kernel_neon_begin();
crc = crc_t10dif_pmull_p64(crc, data, length);
kernel_neon_end();
return crc;
}
scoped_ksimd()
return crc_t10dif_pmull_p64(crc, data, length);
} else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE &&
static_branch_likely(&have_asimd) &&
likely(may_use_simd())) {
static_branch_likely(&have_asimd)) {
u8 buf[16];
kernel_neon_begin();
scoped_ksimd()
crc_t10dif_pmull_p8(crc, data, length, buf);
kernel_neon_end();
return crc_t10dif_generic(0, buf, sizeof(buf));
}

View File

@ -2,7 +2,6 @@
#include <asm/alternative.h>
#include <asm/cpufeature.h>
#include <asm/neon.h>
#include <asm/simd.h>
// The minimum input length to consider the 4-way interleaved code path
@ -23,9 +22,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
kernel_neon_begin();
scoped_ksimd()
crc = crc32_le_arm64_4way(crc, p, len);
kernel_neon_end();
p += round_down(len, 64);
len %= 64;
@ -44,9 +42,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
kernel_neon_begin();
scoped_ksimd()
crc = crc32c_le_arm64_4way(crc, p, len);
kernel_neon_end();
p += round_down(len, 64);
len %= 64;
@ -65,9 +62,8 @@ static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
if (len >= min_len && cpu_have_named_feature(PMULL) &&
likely(may_use_simd())) {
kernel_neon_begin();
scoped_ksimd()
crc = crc32_be_arm64_4way(crc, p, len);
kernel_neon_end();
p += round_down(len, 64);
len %= 64;

View File

@ -24,9 +24,8 @@ static void blake2b_compress(struct blake2b_ctx *ctx,
const size_t blocks = min_t(size_t, nblocks,
SZ_4K / BLAKE2B_BLOCK_SIZE);
kernel_neon_begin();
scoped_ksimd()
blake2b_compress_neon(ctx, data, blocks, inc);
kernel_neon_end();
data += blocks * BLAKE2B_BLOCK_SIZE;
nblocks -= blocks;

View File

@ -12,7 +12,6 @@
#include <asm/cputype.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@ -68,9 +67,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
hchacha_block_arm(state, out, nrounds);
} else {
kernel_neon_begin();
scoped_ksimd()
hchacha_block_neon(state, out, nrounds);
kernel_neon_end();
}
}
@ -87,9 +85,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin();
scoped_ksimd()
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo;
src += todo;

View File

@ -25,9 +25,8 @@ static void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
const u8 point[CURVE25519_KEY_SIZE])
{
if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
kernel_neon_begin();
scoped_ksimd()
curve25519_neon(out, scalar, point);
kernel_neon_end();
} else {
curve25519_generic(out, scalar, point);
}

View File

@ -6,7 +6,6 @@
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
#include <linux/jump_label.h>
@ -32,9 +31,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin();
scoped_ksimd()
poly1305_blocks_neon(state, src, todo, padbit);
kernel_neon_end();
len -= todo;
src += todo;

View File

@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
@ -22,12 +21,12 @@ static void sha1_blocks(struct sha1_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
kernel_neon_begin();
scoped_ksimd() {
if (static_branch_likely(&have_ce))
sha1_ce_transform(state, data, nblocks);
else
sha1_transform_neon(state, data, nblocks);
kernel_neon_end();
}
} else {
sha1_block_data_order(state, data, nblocks);
}

View File

@ -22,12 +22,12 @@ static void sha256_blocks(struct sha256_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
kernel_neon_begin();
scoped_ksimd() {
if (static_branch_likely(&have_ce))
sha256_ce_transform(state, data, nblocks);
else
sha256_block_data_order_neon(state, data, nblocks);
kernel_neon_end();
}
} else {
sha256_block_data_order(state, data, nblocks);
}

View File

@ -19,9 +19,8 @@ static void sha512_blocks(struct sha512_block_state *state,
{
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_neon) && likely(may_use_simd())) {
kernel_neon_begin();
scoped_ksimd()
sha512_block_data_order_neon(state, data, nblocks);
kernel_neon_end();
} else {
sha512_block_data_order(state, data, nblocks);
}

View File

@ -23,7 +23,6 @@
#include <linux/kernel.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
asmlinkage void chacha_block_xor_neon(const struct chacha_state *state,
@ -65,9 +64,8 @@ static void hchacha_block_arch(const struct chacha_state *state,
if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
hchacha_block_generic(state, out, nrounds);
} else {
kernel_neon_begin();
scoped_ksimd()
hchacha_block_neon(state, out, nrounds);
kernel_neon_end();
}
}
@ -81,9 +79,8 @@ static void chacha_crypt_arch(struct chacha_state *state, u8 *dst,
do {
unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
kernel_neon_begin();
scoped_ksimd()
chacha_doneon(state, dst, src, todo, nrounds);
kernel_neon_end();
bytes -= todo;
src += todo;

View File

@ -6,7 +6,6 @@
*/
#include <asm/hwcap.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
#include <linux/jump_label.h>
@ -31,9 +30,8 @@ static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src,
do {
unsigned int todo = min_t(unsigned int, len, SZ_4K);
kernel_neon_begin();
scoped_ksimd()
poly1305_blocks_neon(state, src, todo, padbit);
kernel_neon_end();
len -= todo;
src += todo;

View File

@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@ -24,13 +23,14 @@ static void polyval_preparekey_arch(struct polyval_key *key,
static_assert(ARRAY_SIZE(key->h_powers) == NUM_H_POWERS);
memcpy(&key->h_powers[NUM_H_POWERS - 1], raw_key, POLYVAL_BLOCK_SIZE);
if (static_branch_likely(&have_pmull) && may_use_simd()) {
kernel_neon_begin();
scoped_ksimd() {
for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
key->h_powers[i] = key->h_powers[i + 1];
polyval_mul_pmull(&key->h_powers[i],
polyval_mul_pmull(
&key->h_powers[i],
&key->h_powers[NUM_H_POWERS - 1]);
}
kernel_neon_end();
}
} else {
for (int i = NUM_H_POWERS - 2; i >= 0; i--) {
key->h_powers[i] = key->h_powers[i + 1];
@ -44,9 +44,8 @@ static void polyval_mul_arch(struct polyval_elem *acc,
const struct polyval_key *key)
{
if (static_branch_likely(&have_pmull) && may_use_simd()) {
kernel_neon_begin();
scoped_ksimd()
polyval_mul_pmull(acc, &key->h_powers[NUM_H_POWERS - 1]);
kernel_neon_end();
} else {
polyval_mul_generic(acc, &key->h_powers[NUM_H_POWERS - 1]);
}
@ -62,9 +61,8 @@ static void polyval_blocks_arch(struct polyval_elem *acc,
size_t n = min_t(size_t, nblocks,
4096 / POLYVAL_BLOCK_SIZE);
kernel_neon_begin();
scoped_ksimd()
polyval_blocks_pmull(acc, key, data, n);
kernel_neon_end();
data += n * POLYVAL_BLOCK_SIZE;
nblocks -= n;
} while (nblocks);

View File

@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@ -20,9 +19,9 @@ static void sha1_blocks(struct sha1_block_state *state,
do {
size_t rem;
kernel_neon_begin();
scoped_ksimd()
rem = __sha1_ce_transform(state, data, nblocks);
kernel_neon_end();
data += (nblocks - rem) * SHA1_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);

View File

@ -4,7 +4,6 @@
*
* Copyright 2025 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@ -27,17 +26,16 @@ static void sha256_blocks(struct sha256_block_state *state,
do {
size_t rem;
kernel_neon_begin();
rem = __sha256_ce_transform(state,
data, nblocks);
kernel_neon_end();
scoped_ksimd()
rem = __sha256_ce_transform(state, data,
nblocks);
data += (nblocks - rem) * SHA256_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);
} else {
kernel_neon_begin();
scoped_ksimd()
sha256_block_neon(state, data, nblocks);
kernel_neon_end();
}
} else {
sha256_block_data_order(state, data, nblocks);
@ -66,9 +64,8 @@ static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx,
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE &&
len <= 65536 && likely(may_use_simd())) {
kernel_neon_begin();
scoped_ksimd()
sha256_ce_finup2x(ctx, data1, data2, len, out1, out2);
kernel_neon_end();
kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE);
kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE);
return true;

View File

@ -7,7 +7,6 @@
* published by the Free Software Foundation.
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@ -23,10 +22,9 @@ static void sha3_absorb_blocks(struct sha3_state *state, const u8 *data,
do {
size_t rem;
kernel_neon_begin();
scoped_ksimd()
rem = sha3_ce_transform(state, data, nblocks,
block_size);
kernel_neon_end();
data += (nblocks - rem) * block_size;
nblocks = rem;
} while (nblocks);
@ -46,9 +44,8 @@ static void sha3_keccakf(struct sha3_state *state)
*/
static const u8 zeroes[SHA3_512_BLOCK_SIZE];
kernel_neon_begin();
scoped_ksimd()
sha3_ce_transform(state, zeroes, 1, sizeof(zeroes));
kernel_neon_end();
} else {
sha3_keccakf_generic(state);
}

View File

@ -4,7 +4,7 @@
*
* Copyright 2025 Google LLC
*/
#include <asm/neon.h>
#include <asm/simd.h>
#include <linux/cpufeature.h>
@ -24,9 +24,9 @@ static void sha512_blocks(struct sha512_block_state *state,
do {
size_t rem;
kernel_neon_begin();
scoped_ksimd()
rem = __sha512_ce_transform(state, data, nblocks);
kernel_neon_end();
data += (nblocks - rem) * SHA512_BLOCK_SIZE;
nblocks = rem;
} while (nblocks);

View File

@ -8,10 +8,9 @@
#include <linux/raid/pq.h>
#ifdef __KERNEL__
#include <asm/neon.h>
#include <asm/simd.h>
#else
#define kernel_neon_begin()
#define kernel_neon_end()
#define scoped_ksimd()
#define cpu_has_neon() (1)
#endif
@ -32,10 +31,9 @@
{ \
void raid6_neon ## _n ## _gen_syndrome_real(int, \
unsigned long, void**); \
kernel_neon_begin(); \
scoped_ksimd() \
raid6_neon ## _n ## _gen_syndrome_real(disks, \
(unsigned long)bytes, ptrs); \
kernel_neon_end(); \
} \
static void raid6_neon ## _n ## _xor_syndrome(int disks, \
int start, int stop, \
@ -43,10 +41,9 @@
{ \
void raid6_neon ## _n ## _xor_syndrome_real(int, \
int, int, unsigned long, void**); \
kernel_neon_begin(); \
scoped_ksimd() \
raid6_neon ## _n ## _xor_syndrome_real(disks, \
start, stop, (unsigned long)bytes, ptrs); \
kernel_neon_end(); \
start, stop, (unsigned long)bytes, ptrs);\
} \
struct raid6_calls const raid6_neonx ## _n = { \
raid6_neon ## _n ## _gen_syndrome, \

View File

@ -7,11 +7,10 @@
#include <linux/raid/pq.h>
#ifdef __KERNEL__
#include <asm/neon.h>
#include <asm/simd.h>
#include "neon.h"
#else
#define kernel_neon_begin()
#define kernel_neon_end()
#define scoped_ksimd()
#define cpu_has_neon() (1)
#endif
@ -55,9 +54,8 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila,
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
raid6_gfexp[failb]]];
kernel_neon_begin();
scoped_ksimd()
__raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul);
kernel_neon_end();
}
static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
@ -86,9 +84,8 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila,
/* Now, pick the proper data tables */
qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
kernel_neon_begin();
scoped_ksimd()
__raid6_datap_recov_neon(bytes, p, q, dq, qmul);
kernel_neon_end();
}
const struct raid6_recov_calls raid6_recov_neon = {