From 4ccd065a69df163cd9fe0dd8e0f609f1eeb4723d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 4 Jun 2025 17:54:41 +0800 Subject: [PATCH 001/116] crypto: ahash - Add support for drivers with no fallback Some drivers cannot have a fallback, e.g., because the key is held in hardware. Allow these to be used with ahash by adding the bit CRYPTO_ALG_NO_FALLBACK. Signed-off-by: Herbert Xu Tested-by: Harald Freudenberger --- crypto/ahash.c | 10 +++++++++- include/linux/crypto.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index e10bc2659ae4..bd9e49950201 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -347,6 +347,9 @@ static int ahash_do_req_chain(struct ahash_request *req, if (crypto_ahash_statesize(tfm) > HASH_MAX_STATESIZE) return -ENOSYS; + if (!crypto_ahash_need_fallback(tfm)) + return -ENOSYS; + { u8 state[HASH_MAX_STATESIZE]; @@ -952,6 +955,10 @@ static int ahash_prepare_alg(struct ahash_alg *alg) base->cra_reqsize > MAX_SYNC_HASH_REQSIZE) return -EINVAL; + if (base->cra_flags & CRYPTO_ALG_NEED_FALLBACK && + base->cra_flags & CRYPTO_ALG_NO_FALLBACK) + return -EINVAL; + err = hash_prepare_alg(&alg->halg); if (err) return err; @@ -960,7 +967,8 @@ static int ahash_prepare_alg(struct ahash_alg *alg) base->cra_flags |= CRYPTO_ALG_TYPE_AHASH; if ((base->cra_flags ^ CRYPTO_ALG_REQ_VIRT) & - (CRYPTO_ALG_ASYNC | CRYPTO_ALG_REQ_VIRT)) + (CRYPTO_ALG_ASYNC | CRYPTO_ALG_REQ_VIRT) && + !(base->cra_flags & CRYPTO_ALG_NO_FALLBACK)) base->cra_flags |= CRYPTO_ALG_NEED_FALLBACK; if (!alg->setkey) diff --git a/include/linux/crypto.h b/include/linux/crypto.h index b50f1954d1bb..a2137e19be7d 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -136,6 +136,9 @@ /* Set if the algorithm supports virtual addresses. */ #define CRYPTO_ALG_REQ_VIRT 0x00040000 +/* Set if the algorithm cannot have a fallback (e.g., phmac). */ +#define CRYPTO_ALG_NO_FALLBACK 0x00080000 + /* The high bits 0xff000000 are reserved for type-specific flags. */ /* From f6192d0d641f42f3f5b8efeb6e7f5f8bdbedf7bf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:03:48 +0800 Subject: [PATCH 002/116] crypto: aspeed/hash - Remove purely software hmac implementation The hmac implementation in aspeed simply duplicates what the new ahash hmac template already does, namely construct ipad and opad by hand and then adding them to the hash before feeding it to the engine. Remove them and just use the generic ahash hmac template. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 335 ----------------------- drivers/crypto/aspeed/aspeed-hace.h | 10 - 2 files changed, 345 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index 0b6e49c06eff..0bcec2d40ed6 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -5,7 +5,6 @@ #include "aspeed-hace.h" #include -#include #include #include #include @@ -338,70 +337,6 @@ static int aspeed_hace_ahash_trigger(struct aspeed_hace_dev *hace_dev, return -EINPROGRESS; } -/* - * HMAC resume aims to do the second pass produces - * the final HMAC code derived from the inner hash - * result and the outer key. - */ -static int aspeed_ahash_hmac_resume(struct aspeed_hace_dev *hace_dev) -{ - struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; - struct ahash_request *req = hash_engine->req; - struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm); - struct aspeed_sha_hmac_ctx *bctx = tctx->base; - int rc = 0; - - AHASH_DBG(hace_dev, "\n"); - - dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, - SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); - - dma_unmap_single(hace_dev->dev, rctx->buffer_dma_addr, - rctx->block_size * 2, DMA_TO_DEVICE); - - /* o key pad + hash sum 1 */ - memcpy(rctx->buffer, bctx->opad, rctx->block_size); - memcpy(rctx->buffer + rctx->block_size, rctx->digest, rctx->digsize); - - rctx->bufcnt = rctx->block_size + rctx->digsize; - rctx->digcnt[0] = rctx->block_size + rctx->digsize; - - aspeed_ahash_fill_padding(hace_dev, rctx); - memcpy(rctx->digest, rctx->sha_iv, rctx->ivsize); - - rctx->digest_dma_addr = dma_map_single(hace_dev->dev, rctx->digest, - SHA512_DIGEST_SIZE, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(hace_dev->dev, rctx->digest_dma_addr)) { - dev_warn(hace_dev->dev, "dma_map() rctx digest error\n"); - rc = -ENOMEM; - goto end; - } - - rctx->buffer_dma_addr = dma_map_single(hace_dev->dev, rctx->buffer, - rctx->block_size * 2, - DMA_TO_DEVICE); - if (dma_mapping_error(hace_dev->dev, rctx->buffer_dma_addr)) { - dev_warn(hace_dev->dev, "dma_map() rctx buffer error\n"); - rc = -ENOMEM; - goto free_rctx_digest; - } - - hash_engine->src_dma = rctx->buffer_dma_addr; - hash_engine->src_length = rctx->bufcnt; - hash_engine->digest_dma = rctx->digest_dma_addr; - - return aspeed_hace_ahash_trigger(hace_dev, aspeed_ahash_transfer); - -free_rctx_digest: - dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, - SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); -end: - return rc; -} - static int aspeed_ahash_req_final(struct aspeed_hace_dev *hace_dev) { struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; @@ -437,10 +372,6 @@ static int aspeed_ahash_req_final(struct aspeed_hace_dev *hace_dev) hash_engine->src_length = rctx->bufcnt; hash_engine->digest_dma = rctx->digest_dma_addr; - if (rctx->flags & SHA_FLAGS_HMAC) - return aspeed_hace_ahash_trigger(hace_dev, - aspeed_ahash_hmac_resume); - return aspeed_hace_ahash_trigger(hace_dev, aspeed_ahash_transfer); free_rctx_digest: @@ -609,16 +540,6 @@ static int aspeed_sham_update(struct ahash_request *req) return aspeed_hace_hash_handle_queue(hace_dev, req); } -static int aspeed_sham_shash_digest(struct crypto_shash *tfm, u32 flags, - const u8 *data, unsigned int len, u8 *out) -{ - SHASH_DESC_ON_STACK(shash, tfm); - - shash->tfm = tfm; - - return crypto_shash_digest(shash, data, len, out); -} - static int aspeed_sham_final(struct ahash_request *req) { struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); @@ -664,7 +585,6 @@ static int aspeed_sham_init(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm); struct aspeed_hace_dev *hace_dev = tctx->hace_dev; - struct aspeed_sha_hmac_ctx *bctx = tctx->base; AHASH_DBG(hace_dev, "%s: digest size:%d\n", crypto_tfm_alg_name(&tfm->base), @@ -732,14 +652,6 @@ static int aspeed_sham_init(struct ahash_request *req) rctx->digcnt[0] = 0; rctx->digcnt[1] = 0; - /* HMAC init */ - if (tctx->flags & SHA_FLAGS_HMAC) { - rctx->digcnt[0] = rctx->block_size; - rctx->bufcnt = rctx->block_size; - memcpy(rctx->buffer, bctx->ipad, rctx->block_size); - rctx->flags |= SHA_FLAGS_HMAC; - } - return 0; } @@ -748,43 +660,6 @@ static int aspeed_sham_digest(struct ahash_request *req) return aspeed_sham_init(req) ? : aspeed_sham_finup(req); } -static int aspeed_sham_setkey(struct crypto_ahash *tfm, const u8 *key, - unsigned int keylen) -{ - struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm); - struct aspeed_hace_dev *hace_dev = tctx->hace_dev; - struct aspeed_sha_hmac_ctx *bctx = tctx->base; - int ds = crypto_shash_digestsize(bctx->shash); - int bs = crypto_shash_blocksize(bctx->shash); - int err = 0; - int i; - - AHASH_DBG(hace_dev, "%s: keylen:%d\n", crypto_tfm_alg_name(&tfm->base), - keylen); - - if (keylen > bs) { - err = aspeed_sham_shash_digest(bctx->shash, - crypto_shash_get_flags(bctx->shash), - key, keylen, bctx->ipad); - if (err) - return err; - keylen = ds; - - } else { - memcpy(bctx->ipad, key, keylen); - } - - memset(bctx->ipad + keylen, 0, bs - keylen); - memcpy(bctx->opad, bctx->ipad, bs); - - for (i = 0; i < bs; i++) { - bctx->ipad[i] ^= HMAC_IPAD_VALUE; - bctx->opad[i] ^= HMAC_OPAD_VALUE; - } - - return err; -} - static int aspeed_sham_cra_init(struct crypto_tfm *tfm) { struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); @@ -793,43 +668,13 @@ static int aspeed_sham_cra_init(struct crypto_tfm *tfm) ast_alg = container_of(alg, struct aspeed_hace_alg, alg.ahash.base); tctx->hace_dev = ast_alg->hace_dev; - tctx->flags = 0; crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct aspeed_sham_reqctx)); - if (ast_alg->alg_base) { - /* hmac related */ - struct aspeed_sha_hmac_ctx *bctx = tctx->base; - - tctx->flags |= SHA_FLAGS_HMAC; - bctx->shash = crypto_alloc_shash(ast_alg->alg_base, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(bctx->shash)) { - dev_warn(ast_alg->hace_dev->dev, - "base driver '%s' could not be loaded.\n", - ast_alg->alg_base); - return PTR_ERR(bctx->shash); - } - } - return 0; } -static void aspeed_sham_cra_exit(struct crypto_tfm *tfm) -{ - struct aspeed_sham_ctx *tctx = crypto_tfm_ctx(tfm); - struct aspeed_hace_dev *hace_dev = tctx->hace_dev; - - AHASH_DBG(hace_dev, "%s\n", crypto_tfm_alg_name(tfm)); - - if (tctx->flags & SHA_FLAGS_HMAC) { - struct aspeed_sha_hmac_ctx *bctx = tctx->base; - - crypto_free_shash(bctx->shash); - } -} - static int aspeed_sham_export(struct ahash_request *req, void *out) { struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); @@ -873,7 +718,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, } } }, @@ -905,7 +749,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, } } }, @@ -937,112 +780,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, - } - } - }, - .alg.ahash.op = { - .do_one_request = aspeed_ahash_do_one, - }, - }, - { - .alg_base = "sha1", - .alg.ahash.base = { - .init = aspeed_sham_init, - .update = aspeed_sham_update, - .final = aspeed_sham_final, - .finup = aspeed_sham_finup, - .digest = aspeed_sham_digest, - .setkey = aspeed_sham_setkey, - .export = aspeed_sham_export, - .import = aspeed_sham_import, - .halg = { - .digestsize = SHA1_DIGEST_SIZE, - .statesize = sizeof(struct aspeed_sham_reqctx), - .base = { - .cra_name = "hmac(sha1)", - .cra_driver_name = "aspeed-hmac-sha1", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = SHA1_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aspeed_sham_ctx) + - sizeof(struct aspeed_sha_hmac_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, - } - } - }, - .alg.ahash.op = { - .do_one_request = aspeed_ahash_do_one, - }, - }, - { - .alg_base = "sha224", - .alg.ahash.base = { - .init = aspeed_sham_init, - .update = aspeed_sham_update, - .final = aspeed_sham_final, - .finup = aspeed_sham_finup, - .digest = aspeed_sham_digest, - .setkey = aspeed_sham_setkey, - .export = aspeed_sham_export, - .import = aspeed_sham_import, - .halg = { - .digestsize = SHA224_DIGEST_SIZE, - .statesize = sizeof(struct aspeed_sham_reqctx), - .base = { - .cra_name = "hmac(sha224)", - .cra_driver_name = "aspeed-hmac-sha224", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = SHA224_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aspeed_sham_ctx) + - sizeof(struct aspeed_sha_hmac_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, - } - } - }, - .alg.ahash.op = { - .do_one_request = aspeed_ahash_do_one, - }, - }, - { - .alg_base = "sha256", - .alg.ahash.base = { - .init = aspeed_sham_init, - .update = aspeed_sham_update, - .final = aspeed_sham_final, - .finup = aspeed_sham_finup, - .digest = aspeed_sham_digest, - .setkey = aspeed_sham_setkey, - .export = aspeed_sham_export, - .import = aspeed_sham_import, - .halg = { - .digestsize = SHA256_DIGEST_SIZE, - .statesize = sizeof(struct aspeed_sham_reqctx), - .base = { - .cra_name = "hmac(sha256)", - .cra_driver_name = "aspeed-hmac-sha256", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = SHA256_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aspeed_sham_ctx) + - sizeof(struct aspeed_sha_hmac_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, } } }, @@ -1077,7 +814,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, } } }, @@ -1109,77 +845,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, - } - } - }, - .alg.ahash.op = { - .do_one_request = aspeed_ahash_do_one, - }, - }, - { - .alg_base = "sha384", - .alg.ahash.base = { - .init = aspeed_sham_init, - .update = aspeed_sham_update, - .final = aspeed_sham_final, - .finup = aspeed_sham_finup, - .digest = aspeed_sham_digest, - .setkey = aspeed_sham_setkey, - .export = aspeed_sham_export, - .import = aspeed_sham_import, - .halg = { - .digestsize = SHA384_DIGEST_SIZE, - .statesize = sizeof(struct aspeed_sham_reqctx), - .base = { - .cra_name = "hmac(sha384)", - .cra_driver_name = "aspeed-hmac-sha384", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = SHA384_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aspeed_sham_ctx) + - sizeof(struct aspeed_sha_hmac_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, - } - } - }, - .alg.ahash.op = { - .do_one_request = aspeed_ahash_do_one, - }, - }, - { - .alg_base = "sha512", - .alg.ahash.base = { - .init = aspeed_sham_init, - .update = aspeed_sham_update, - .final = aspeed_sham_final, - .finup = aspeed_sham_finup, - .digest = aspeed_sham_digest, - .setkey = aspeed_sham_setkey, - .export = aspeed_sham_export, - .import = aspeed_sham_import, - .halg = { - .digestsize = SHA512_DIGEST_SIZE, - .statesize = sizeof(struct aspeed_sham_reqctx), - .base = { - .cra_name = "hmac(sha512)", - .cra_driver_name = "aspeed-hmac-sha512", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_AHASH | - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_KERN_DRIVER_ONLY, - .cra_blocksize = SHA512_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aspeed_sham_ctx) + - sizeof(struct aspeed_sha_hmac_ctx), - .cra_alignmask = 0, - .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, - .cra_exit = aspeed_sham_cra_exit, } } }, diff --git a/drivers/crypto/aspeed/aspeed-hace.h b/drivers/crypto/aspeed/aspeed-hace.h index 68f70e01fccb..7ff1798bc198 100644 --- a/drivers/crypto/aspeed/aspeed-hace.h +++ b/drivers/crypto/aspeed/aspeed-hace.h @@ -119,7 +119,6 @@ #define SHA_FLAGS_SHA512 BIT(4) #define SHA_FLAGS_SHA512_224 BIT(5) #define SHA_FLAGS_SHA512_256 BIT(6) -#define SHA_FLAGS_HMAC BIT(8) #define SHA_FLAGS_FINUP BIT(9) #define SHA_FLAGS_MASK (0xff) @@ -161,17 +160,8 @@ struct aspeed_engine_hash { aspeed_hace_fn_t dma_prepare; }; -struct aspeed_sha_hmac_ctx { - struct crypto_shash *shash; - u8 ipad[SHA512_BLOCK_SIZE]; - u8 opad[SHA512_BLOCK_SIZE]; -}; - struct aspeed_sham_ctx { struct aspeed_hace_dev *hace_dev; - unsigned long flags; /* hmac flag */ - - struct aspeed_sha_hmac_ctx base[]; }; struct aspeed_sham_reqctx { From 7938eb10436c792bfb2d80433015347b571446d6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:03:50 +0800 Subject: [PATCH 003/116] crypto: aspeed/hash - Reorganise struct aspeed_sham_reqctx Move the from-device DMA buffer to the front of the structure. Sort the rest by size and alignment. Keep the partial block buffer at the end. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace.h b/drivers/crypto/aspeed/aspeed-hace.h index 7ff1798bc198..a34677f10966 100644 --- a/drivers/crypto/aspeed/aspeed-hace.h +++ b/drivers/crypto/aspeed/aspeed-hace.h @@ -165,6 +165,12 @@ struct aspeed_sham_ctx { }; struct aspeed_sham_reqctx { + /* DMA buffer written by hardware */ + u8 digest[SHA512_DIGEST_SIZE] __aligned(64); + + /* Software state sorted by size. */ + u64 digcnt[2]; + unsigned long flags; /* final update flag should no use*/ unsigned long op; /* final or update */ u32 cmd; /* trigger cmd */ @@ -181,14 +187,13 @@ struct aspeed_sham_reqctx { const __be32 *sha_iv; /* remain data buffer */ - u8 buffer[SHA512_BLOCK_SIZE * 2]; dma_addr_t buffer_dma_addr; size_t bufcnt; /* buffer counter */ - /* output buffer */ - u8 digest[SHA512_DIGEST_SIZE] __aligned(64); dma_addr_t digest_dma_addr; - u64 digcnt[2]; + + /* This is DMA too but read-only for hardware. */ + u8 buffer[SHA512_BLOCK_SIZE * 2]; }; struct aspeed_engine_crypto { From f91fd0d97990b22c5cebe9443ee04286ee44c205 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:03:52 +0800 Subject: [PATCH 004/116] crypto: aspeed/hash - Use init_tfm instead of cra_init Use the init_tfm interface instead of cra_init. Also get rid of the dynamic reqsize. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index 0bcec2d40ed6..4a479a204331 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -660,18 +660,15 @@ static int aspeed_sham_digest(struct ahash_request *req) return aspeed_sham_init(req) ? : aspeed_sham_finup(req); } -static int aspeed_sham_cra_init(struct crypto_tfm *tfm) +static int aspeed_sham_cra_init(struct crypto_ahash *tfm) { - struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); - struct aspeed_sham_ctx *tctx = crypto_tfm_ctx(tfm); + struct ahash_alg *alg = crypto_ahash_alg(tfm); + struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm); struct aspeed_hace_alg *ast_alg; ast_alg = container_of(alg, struct aspeed_hace_alg, alg.ahash.base); tctx->hace_dev = ast_alg->hace_dev; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct aspeed_sham_reqctx)); - return 0; } @@ -703,6 +700,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .digest = aspeed_sham_digest, .export = aspeed_sham_export, .import = aspeed_sham_import, + .init_tfm = aspeed_sham_cra_init, .halg = { .digestsize = SHA1_DIGEST_SIZE, .statesize = sizeof(struct aspeed_sham_reqctx), @@ -715,9 +713,9 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), + .cra_reqsize = sizeof(struct aspeed_sham_reqctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, } } }, @@ -734,6 +732,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .digest = aspeed_sham_digest, .export = aspeed_sham_export, .import = aspeed_sham_import, + .init_tfm = aspeed_sham_cra_init, .halg = { .digestsize = SHA256_DIGEST_SIZE, .statesize = sizeof(struct aspeed_sham_reqctx), @@ -746,9 +745,9 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), + .cra_reqsize = sizeof(struct aspeed_sham_reqctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, } } }, @@ -765,6 +764,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .digest = aspeed_sham_digest, .export = aspeed_sham_export, .import = aspeed_sham_import, + .init_tfm = aspeed_sham_cra_init, .halg = { .digestsize = SHA224_DIGEST_SIZE, .statesize = sizeof(struct aspeed_sham_reqctx), @@ -777,9 +777,9 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), + .cra_reqsize = sizeof(struct aspeed_sham_reqctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, } } }, @@ -799,6 +799,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { .digest = aspeed_sham_digest, .export = aspeed_sham_export, .import = aspeed_sham_import, + .init_tfm = aspeed_sham_cra_init, .halg = { .digestsize = SHA384_DIGEST_SIZE, .statesize = sizeof(struct aspeed_sham_reqctx), @@ -811,9 +812,9 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), + .cra_reqsize = sizeof(struct aspeed_sham_reqctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, } } }, @@ -830,6 +831,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { .digest = aspeed_sham_digest, .export = aspeed_sham_export, .import = aspeed_sham_import, + .init_tfm = aspeed_sham_cra_init, .halg = { .digestsize = SHA512_DIGEST_SIZE, .statesize = sizeof(struct aspeed_sham_reqctx), @@ -842,9 +844,9 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), + .cra_reqsize = sizeof(struct aspeed_sham_reqctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, - .cra_init = aspeed_sham_cra_init, } } }, From 43ddeca4ff5418f858ea7457bcb75274d6711870 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:03:54 +0800 Subject: [PATCH 005/116] crypto: aspeed/hash - Provide rctx->buffer as argument to fill padding Instead of always writing the padding to rctx->buffer, make it an argument. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 27 ++++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index 4a479a204331..9f776ec8f5ec 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -73,10 +73,10 @@ static const __be64 sha512_iv[8] = { * - if message length < 112 bytes then padlen = 112 - message length * - else padlen = 128 + 112 - message length */ -static void aspeed_ahash_fill_padding(struct aspeed_hace_dev *hace_dev, - struct aspeed_sham_reqctx *rctx) +static int aspeed_ahash_fill_padding(struct aspeed_hace_dev *hace_dev, + struct aspeed_sham_reqctx *rctx, u8 *buf) { - unsigned int index, padlen; + unsigned int index, padlen, bitslen; __be64 bits[2]; AHASH_DBG(hace_dev, "rctx flags:0x%x\n", (u32)rctx->flags); @@ -86,25 +86,23 @@ static void aspeed_ahash_fill_padding(struct aspeed_hace_dev *hace_dev, case SHA_FLAGS_SHA224: case SHA_FLAGS_SHA256: bits[0] = cpu_to_be64(rctx->digcnt[0] << 3); - index = rctx->bufcnt & 0x3f; + index = rctx->digcnt[0] & 0x3f; padlen = (index < 56) ? (56 - index) : ((64 + 56) - index); - *(rctx->buffer + rctx->bufcnt) = 0x80; - memset(rctx->buffer + rctx->bufcnt + 1, 0, padlen - 1); - memcpy(rctx->buffer + rctx->bufcnt + padlen, bits, 8); - rctx->bufcnt += padlen + 8; + bitslen = 8; break; default: bits[1] = cpu_to_be64(rctx->digcnt[0] << 3); bits[0] = cpu_to_be64(rctx->digcnt[1] << 3 | rctx->digcnt[0] >> 61); - index = rctx->bufcnt & 0x7f; + index = rctx->digcnt[0] & 0x7f; padlen = (index < 112) ? (112 - index) : ((128 + 112) - index); - *(rctx->buffer + rctx->bufcnt) = 0x80; - memset(rctx->buffer + rctx->bufcnt + 1, 0, padlen - 1); - memcpy(rctx->buffer + rctx->bufcnt + padlen, bits, 16); - rctx->bufcnt += padlen + 16; + bitslen = 16; break; } + buf[0] = 0x80; + memset(buf + 1, 0, padlen - 1); + memcpy(buf + padlen, bits, bitslen); + return padlen + bitslen; } /* @@ -346,7 +344,8 @@ static int aspeed_ahash_req_final(struct aspeed_hace_dev *hace_dev) AHASH_DBG(hace_dev, "\n"); - aspeed_ahash_fill_padding(hace_dev, rctx); + rctx->bufcnt += aspeed_ahash_fill_padding(hace_dev, rctx, + rctx->buffer + rctx->bufcnt); rctx->digest_dma_addr = dma_map_single(hace_dev->dev, rctx->digest, From 879203defb9216dd4343c3bf995c34321232a5cd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:03:57 +0800 Subject: [PATCH 006/116] crypto: aspeed/hash - Move sham_final call into sham_update The only time when sham_final needs to be called in sham_finup is when the finup request fits into the partial block. Move this special handling into sham_update. The comment about releaseing resources is non-sense. The Crypto API does not mandate the use of final so the user could always go away after an update and never come back. Therefore the driver must not hold any resources after an update call. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 44 +++++++++--------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index 9f776ec8f5ec..40363159489e 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -508,6 +508,20 @@ static int aspeed_ahash_do_one(struct crypto_engine *engine, void *areq) return aspeed_ahash_do_request(engine, areq); } +static int aspeed_sham_final(struct ahash_request *req) +{ + struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm); + struct aspeed_hace_dev *hace_dev = tctx->hace_dev; + + AHASH_DBG(hace_dev, "req->nbytes:%d, rctx->total:%d\n", + req->nbytes, rctx->total); + rctx->op = SHA_OP_FINAL; + + return aspeed_hace_hash_handle_queue(hace_dev, req); +} + static int aspeed_sham_update(struct ahash_request *req) { struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); @@ -533,49 +547,25 @@ static int aspeed_sham_update(struct ahash_request *req) rctx->total, 0); rctx->bufcnt += rctx->total; - return 0; + return rctx->flags & SHA_FLAGS_FINUP ? + aspeed_sham_final(req) : 0; } return aspeed_hace_hash_handle_queue(hace_dev, req); } -static int aspeed_sham_final(struct ahash_request *req) -{ - struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm); - struct aspeed_hace_dev *hace_dev = tctx->hace_dev; - - AHASH_DBG(hace_dev, "req->nbytes:%d, rctx->total:%d\n", - req->nbytes, rctx->total); - rctx->op = SHA_OP_FINAL; - - return aspeed_hace_hash_handle_queue(hace_dev, req); -} - static int aspeed_sham_finup(struct ahash_request *req) { struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm); struct aspeed_hace_dev *hace_dev = tctx->hace_dev; - int rc1, rc2; AHASH_DBG(hace_dev, "req->nbytes: %d\n", req->nbytes); rctx->flags |= SHA_FLAGS_FINUP; - rc1 = aspeed_sham_update(req); - if (rc1 == -EINPROGRESS || rc1 == -EBUSY) - return rc1; - - /* - * final() has to be always called to cleanup resources - * even if update() failed, except EINPROGRESS - */ - rc2 = aspeed_sham_final(req); - - return rc1 ? : rc2; + return aspeed_sham_update(req); } static int aspeed_sham_init(struct ahash_request *req) From 278d737cc2e0044a93d1dabbdcf293a8e65d981d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:03:59 +0800 Subject: [PATCH 007/116] crypto: aspeed/hash - Move final padding into dma_prepare Rather than processing a final as two separate updates, combine them into one for the linear dma_prepare case. This means that the total hash size is slightly reduced, but that will be fixed up later by repeating the hash if necessary. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 29 ++++++++++++++---------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index 40363159489e..ceea2e2f5658 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -114,29 +114,34 @@ static int aspeed_ahash_dma_prepare(struct aspeed_hace_dev *hace_dev) struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; struct ahash_request *req = hash_engine->req; struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - int length, remain; + bool final = rctx->flags & SHA_FLAGS_FINUP; + unsigned int length, remain; length = rctx->total + rctx->bufcnt; - remain = length % rctx->block_size; + remain = final ? 0 : length % rctx->block_size; AHASH_DBG(hace_dev, "length:0x%x, remain:0x%x\n", length, remain); if (rctx->bufcnt) memcpy(hash_engine->ahash_src_addr, rctx->buffer, rctx->bufcnt); - if (rctx->total + rctx->bufcnt < ASPEED_CRYPTO_SRC_DMA_BUF_LEN) { - scatterwalk_map_and_copy(hash_engine->ahash_src_addr + - rctx->bufcnt, rctx->src_sg, - rctx->offset, rctx->total - remain, 0); - rctx->offset += rctx->total - remain; - - } else { + if ((final ? round_up(length, rctx->block_size) + rctx->block_size : + length) > ASPEED_CRYPTO_SRC_DMA_BUF_LEN) { dev_warn(hace_dev->dev, "Hash data length is too large\n"); return -EINVAL; } - scatterwalk_map_and_copy(rctx->buffer, rctx->src_sg, - rctx->offset, remain, 0); + scatterwalk_map_and_copy(hash_engine->ahash_src_addr + + rctx->bufcnt, rctx->src_sg, + rctx->offset, rctx->total - remain, 0); + rctx->offset += rctx->total - remain; + + if (final) + length += aspeed_ahash_fill_padding( + hace_dev, rctx, hash_engine->ahash_src_addr + length); + else + scatterwalk_map_and_copy(rctx->buffer, rctx->src_sg, + rctx->offset, remain, 0); rctx->bufcnt = remain; rctx->digest_dma_addr = dma_map_single(hace_dev->dev, rctx->digest, @@ -423,7 +428,7 @@ static int aspeed_ahash_update_resume(struct aspeed_hace_dev *hace_dev) SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); if (rctx->flags & SHA_FLAGS_FINUP) - return aspeed_ahash_req_final(hace_dev); + memcpy(req->result, rctx->digest, rctx->digsize); return aspeed_ahash_complete(hace_dev); } From 72c50eb4f54c2b1412e2a79ec273fa28a449dc8f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:04:01 +0800 Subject: [PATCH 008/116] crypto: aspeed/hash - Remove sha_iv Removed unused sha_iv field from request context. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 5 ----- drivers/crypto/aspeed/aspeed-hace.h | 1 - 2 files changed, 6 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index ceea2e2f5658..d31d7e1c9af2 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -593,7 +593,6 @@ static int aspeed_sham_init(struct ahash_request *req) rctx->flags |= SHA_FLAGS_SHA1; rctx->digsize = SHA1_DIGEST_SIZE; rctx->block_size = SHA1_BLOCK_SIZE; - rctx->sha_iv = sha1_iv; rctx->ivsize = 32; memcpy(rctx->digest, sha1_iv, rctx->ivsize); break; @@ -602,7 +601,6 @@ static int aspeed_sham_init(struct ahash_request *req) rctx->flags |= SHA_FLAGS_SHA224; rctx->digsize = SHA224_DIGEST_SIZE; rctx->block_size = SHA224_BLOCK_SIZE; - rctx->sha_iv = sha224_iv; rctx->ivsize = 32; memcpy(rctx->digest, sha224_iv, rctx->ivsize); break; @@ -611,7 +609,6 @@ static int aspeed_sham_init(struct ahash_request *req) rctx->flags |= SHA_FLAGS_SHA256; rctx->digsize = SHA256_DIGEST_SIZE; rctx->block_size = SHA256_BLOCK_SIZE; - rctx->sha_iv = sha256_iv; rctx->ivsize = 32; memcpy(rctx->digest, sha256_iv, rctx->ivsize); break; @@ -621,7 +618,6 @@ static int aspeed_sham_init(struct ahash_request *req) rctx->flags |= SHA_FLAGS_SHA384; rctx->digsize = SHA384_DIGEST_SIZE; rctx->block_size = SHA384_BLOCK_SIZE; - rctx->sha_iv = (const __be32 *)sha384_iv; rctx->ivsize = 64; memcpy(rctx->digest, sha384_iv, rctx->ivsize); break; @@ -631,7 +627,6 @@ static int aspeed_sham_init(struct ahash_request *req) rctx->flags |= SHA_FLAGS_SHA512; rctx->digsize = SHA512_DIGEST_SIZE; rctx->block_size = SHA512_BLOCK_SIZE; - rctx->sha_iv = (const __be32 *)sha512_iv; rctx->ivsize = 64; memcpy(rctx->digest, sha512_iv, rctx->ivsize); break; diff --git a/drivers/crypto/aspeed/aspeed-hace.h b/drivers/crypto/aspeed/aspeed-hace.h index a34677f10966..ad39954251dd 100644 --- a/drivers/crypto/aspeed/aspeed-hace.h +++ b/drivers/crypto/aspeed/aspeed-hace.h @@ -184,7 +184,6 @@ struct aspeed_sham_reqctx { size_t digsize; size_t block_size; size_t ivsize; - const __be32 *sha_iv; /* remain data buffer */ dma_addr_t buffer_dma_addr; From 5f38ebefc3703477a87c128d1a251ab6c9f4fbf8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:04:04 +0800 Subject: [PATCH 009/116] crypto: aspeed/hash - Use API partial block handling Use the Crypto API partial block handling. Also switch to the generic export format. Remove final function that is no longer used by the Crypto API. Move final padding into aspeed_ahash_dma_prepare_sg. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 304 ++++++++--------------- drivers/crypto/aspeed/aspeed-hace.h | 6 +- 2 files changed, 104 insertions(+), 206 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index d31d7e1c9af2..d7c7f867d6e1 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #ifdef CONFIG_CRYPTO_DEV_ASPEED_DEBUG @@ -58,6 +59,46 @@ static const __be64 sha512_iv[8] = { cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7) }; +static int aspeed_sham_init(struct ahash_request *req); +static int aspeed_ahash_req_update(struct aspeed_hace_dev *hace_dev); + +static int aspeed_sham_export(struct ahash_request *req, void *out) +{ + struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); + union { + u8 *u8; + u64 *u64; + } p = { .u8 = out }; + + memcpy(out, rctx->digest, rctx->ivsize); + p.u8 += rctx->ivsize; + put_unaligned(rctx->digcnt[0], p.u64++); + if (rctx->ivsize == 64) + put_unaligned(rctx->digcnt[1], p.u64); + return 0; +} + +static int aspeed_sham_import(struct ahash_request *req, const void *in) +{ + struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); + union { + const u8 *u8; + const u64 *u64; + } p = { .u8 = in }; + int err; + + err = aspeed_sham_init(req); + if (err) + return err; + + memcpy(rctx->digest, in, rctx->ivsize); + p.u8 += rctx->ivsize; + rctx->digcnt[0] = get_unaligned(p.u64++); + if (rctx->ivsize == 64) + rctx->digcnt[1] = get_unaligned(p.u64); + return 0; +} + /* The purpose of this padding is to ensure that the padded message is a * multiple of 512 bits (SHA1/SHA224/SHA256) or 1024 bits (SHA384/SHA512). * The bit "1" is appended at the end of the message followed by @@ -117,33 +158,29 @@ static int aspeed_ahash_dma_prepare(struct aspeed_hace_dev *hace_dev) bool final = rctx->flags & SHA_FLAGS_FINUP; unsigned int length, remain; - length = rctx->total + rctx->bufcnt; + length = rctx->total; remain = final ? 0 : length % rctx->block_size; AHASH_DBG(hace_dev, "length:0x%x, remain:0x%x\n", length, remain); - if (rctx->bufcnt) - memcpy(hash_engine->ahash_src_addr, rctx->buffer, rctx->bufcnt); - if ((final ? round_up(length, rctx->block_size) + rctx->block_size : length) > ASPEED_CRYPTO_SRC_DMA_BUF_LEN) { dev_warn(hace_dev->dev, "Hash data length is too large\n"); return -EINVAL; } - scatterwalk_map_and_copy(hash_engine->ahash_src_addr + - rctx->bufcnt, rctx->src_sg, + scatterwalk_map_and_copy(hash_engine->ahash_src_addr, rctx->src_sg, rctx->offset, rctx->total - remain, 0); rctx->offset += rctx->total - remain; + rctx->digcnt[0] += rctx->total - remain; + if (rctx->digcnt[0] < rctx->total - remain) + rctx->digcnt[1]++; + if (final) length += aspeed_ahash_fill_padding( hace_dev, rctx, hash_engine->ahash_src_addr + length); - else - scatterwalk_map_and_copy(rctx->buffer, rctx->src_sg, - rctx->offset, remain, 0); - rctx->bufcnt = remain; rctx->digest_dma_addr = dma_map_single(hace_dev->dev, rctx->digest, SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); @@ -168,16 +205,17 @@ static int aspeed_ahash_dma_prepare_sg(struct aspeed_hace_dev *hace_dev) struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; struct ahash_request *req = hash_engine->req; struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); + bool final = rctx->flags & SHA_FLAGS_FINUP; struct aspeed_sg_list *src_list; struct scatterlist *s; int length, remain, sg_len, i; int rc = 0; - remain = (rctx->total + rctx->bufcnt) % rctx->block_size; - length = rctx->total + rctx->bufcnt - remain; + remain = final ? 0 : rctx->total % rctx->block_size; + length = rctx->total - remain; - AHASH_DBG(hace_dev, "%s:0x%x, %s:%zu, %s:0x%x, %s:0x%x\n", - "rctx total", rctx->total, "bufcnt", rctx->bufcnt, + AHASH_DBG(hace_dev, "%s:0x%x, %s:0x%x, %s:0x%x\n", + "rctx total", rctx->total, "length", length, "remain", remain); sg_len = dma_map_sg(hace_dev->dev, rctx->src_sg, rctx->src_nents, @@ -198,13 +236,39 @@ static int aspeed_ahash_dma_prepare_sg(struct aspeed_hace_dev *hace_dev) goto free_src_sg; } - if (rctx->bufcnt != 0) { - u32 phy_addr; - u32 len; + for_each_sg(rctx->src_sg, s, sg_len, i) { + u32 phy_addr = sg_dma_address(s); + u32 len = sg_dma_len(s); + if (length > len) + length -= len; + else { + /* Last sg list */ + len = length; + length = 0; + } + + src_list[i].phy_addr = cpu_to_le32(phy_addr); + src_list[i].len = cpu_to_le32(len); + } + + if (length != 0) { + rc = -EINVAL; + goto free_rctx_digest; + } + + rctx->digcnt[0] += rctx->total - remain; + if (rctx->digcnt[0] < rctx->total - remain) + rctx->digcnt[1]++; + + if (final) { + int len = aspeed_ahash_fill_padding(hace_dev, rctx, + rctx->buffer); + + rctx->total += len; rctx->buffer_dma_addr = dma_map_single(hace_dev->dev, rctx->buffer, - rctx->block_size * 2, + sizeof(rctx->buffer), DMA_TO_DEVICE); if (dma_mapping_error(hace_dev->dev, rctx->buffer_dma_addr)) { dev_warn(hace_dev->dev, "dma_map() rctx buffer error\n"); @@ -212,54 +276,19 @@ static int aspeed_ahash_dma_prepare_sg(struct aspeed_hace_dev *hace_dev) goto free_rctx_digest; } - phy_addr = rctx->buffer_dma_addr; - len = rctx->bufcnt; - length -= len; - - /* Last sg list */ - if (length == 0) - len |= HASH_SG_LAST_LIST; - - src_list[0].phy_addr = cpu_to_le32(phy_addr); - src_list[0].len = cpu_to_le32(len); - src_list++; - } - - if (length != 0) { - for_each_sg(rctx->src_sg, s, sg_len, i) { - u32 phy_addr = sg_dma_address(s); - u32 len = sg_dma_len(s); - - if (length > len) - length -= len; - else { - /* Last sg list */ - len = length; - len |= HASH_SG_LAST_LIST; - length = 0; - } - - src_list[i].phy_addr = cpu_to_le32(phy_addr); - src_list[i].len = cpu_to_le32(len); - } - } - - if (length != 0) { - rc = -EINVAL; - goto free_rctx_buffer; + src_list[i].phy_addr = cpu_to_le32(rctx->buffer_dma_addr); + src_list[i].len = cpu_to_le32(len); + i++; } + src_list[i - 1].len |= cpu_to_le32(HASH_SG_LAST_LIST); rctx->offset = rctx->total - remain; - hash_engine->src_length = rctx->total + rctx->bufcnt - remain; + hash_engine->src_length = rctx->total - remain; hash_engine->src_dma = hash_engine->ahash_src_dma_addr; hash_engine->digest_dma = rctx->digest_dma_addr; return 0; -free_rctx_buffer: - if (rctx->bufcnt != 0) - dma_unmap_single(hace_dev->dev, rctx->buffer_dma_addr, - rctx->block_size * 2, DMA_TO_DEVICE); free_rctx_digest: dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); @@ -274,39 +303,21 @@ static int aspeed_ahash_complete(struct aspeed_hace_dev *hace_dev) { struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; struct ahash_request *req = hash_engine->req; + struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); AHASH_DBG(hace_dev, "\n"); hash_engine->flags &= ~CRYPTO_FLAGS_BUSY; - crypto_finalize_hash_request(hace_dev->crypt_engine_hash, req, 0); + if (rctx->flags & SHA_FLAGS_FINUP) + memcpy(req->result, rctx->digest, rctx->digsize); + + crypto_finalize_hash_request(hace_dev->crypt_engine_hash, req, + rctx->total - rctx->offset); return 0; } -/* - * Copy digest to the corresponding request result. - * This function will be called at final() stage. - */ -static int aspeed_ahash_transfer(struct aspeed_hace_dev *hace_dev) -{ - struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; - struct ahash_request *req = hash_engine->req; - struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - - AHASH_DBG(hace_dev, "\n"); - - dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, - SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); - - dma_unmap_single(hace_dev->dev, rctx->buffer_dma_addr, - rctx->block_size * 2, DMA_TO_DEVICE); - - memcpy(req->result, rctx->digest, rctx->digsize); - - return aspeed_ahash_complete(hace_dev); -} - /* * Trigger hardware engines to do the math. */ @@ -340,51 +351,6 @@ static int aspeed_hace_ahash_trigger(struct aspeed_hace_dev *hace_dev, return -EINPROGRESS; } -static int aspeed_ahash_req_final(struct aspeed_hace_dev *hace_dev) -{ - struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; - struct ahash_request *req = hash_engine->req; - struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - int rc = 0; - - AHASH_DBG(hace_dev, "\n"); - - rctx->bufcnt += aspeed_ahash_fill_padding(hace_dev, rctx, - rctx->buffer + rctx->bufcnt); - - rctx->digest_dma_addr = dma_map_single(hace_dev->dev, - rctx->digest, - SHA512_DIGEST_SIZE, - DMA_BIDIRECTIONAL); - if (dma_mapping_error(hace_dev->dev, rctx->digest_dma_addr)) { - dev_warn(hace_dev->dev, "dma_map() rctx digest error\n"); - rc = -ENOMEM; - goto end; - } - - rctx->buffer_dma_addr = dma_map_single(hace_dev->dev, - rctx->buffer, - rctx->block_size * 2, - DMA_TO_DEVICE); - if (dma_mapping_error(hace_dev->dev, rctx->buffer_dma_addr)) { - dev_warn(hace_dev->dev, "dma_map() rctx buffer error\n"); - rc = -ENOMEM; - goto free_rctx_digest; - } - - hash_engine->src_dma = rctx->buffer_dma_addr; - hash_engine->src_length = rctx->bufcnt; - hash_engine->digest_dma = rctx->digest_dma_addr; - - return aspeed_hace_ahash_trigger(hace_dev, aspeed_ahash_transfer); - -free_rctx_digest: - dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, - SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); -end: - return rc; -} - static int aspeed_ahash_update_resume_sg(struct aspeed_hace_dev *hace_dev) { struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; @@ -396,23 +362,15 @@ static int aspeed_ahash_update_resume_sg(struct aspeed_hace_dev *hace_dev) dma_unmap_sg(hace_dev->dev, rctx->src_sg, rctx->src_nents, DMA_TO_DEVICE); - if (rctx->bufcnt != 0) + if (rctx->flags & SHA_FLAGS_FINUP) dma_unmap_single(hace_dev->dev, rctx->buffer_dma_addr, - rctx->block_size * 2, - DMA_TO_DEVICE); + sizeof(rctx->buffer), DMA_TO_DEVICE); dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); - scatterwalk_map_and_copy(rctx->buffer, rctx->src_sg, rctx->offset, - rctx->total - rctx->offset, 0); - - rctx->bufcnt = rctx->total - rctx->offset; rctx->cmd &= ~HASH_CMD_HASH_SRC_SG_CTRL; - if (rctx->flags & SHA_FLAGS_FINUP) - return aspeed_ahash_req_final(hace_dev); - return aspeed_ahash_complete(hace_dev); } @@ -427,9 +385,6 @@ static int aspeed_ahash_update_resume(struct aspeed_hace_dev *hace_dev) dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); - if (rctx->flags & SHA_FLAGS_FINUP) - memcpy(req->result, rctx->digest, rctx->digsize); - return aspeed_ahash_complete(hace_dev); } @@ -468,21 +423,16 @@ static int aspeed_hace_hash_handle_queue(struct aspeed_hace_dev *hace_dev, static int aspeed_ahash_do_request(struct crypto_engine *engine, void *areq) { struct ahash_request *req = ahash_request_cast(areq); - struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm); struct aspeed_hace_dev *hace_dev = tctx->hace_dev; struct aspeed_engine_hash *hash_engine; - int ret = 0; + int ret; hash_engine = &hace_dev->hash_engine; hash_engine->flags |= CRYPTO_FLAGS_BUSY; - if (rctx->op == SHA_OP_UPDATE) - ret = aspeed_ahash_req_update(hace_dev); - else if (rctx->op == SHA_OP_FINAL) - ret = aspeed_ahash_req_final(hace_dev); - + ret = aspeed_ahash_req_update(hace_dev); if (ret != -EINPROGRESS) return ret; @@ -513,20 +463,6 @@ static int aspeed_ahash_do_one(struct crypto_engine *engine, void *areq) return aspeed_ahash_do_request(engine, areq); } -static int aspeed_sham_final(struct ahash_request *req) -{ - struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct aspeed_sham_ctx *tctx = crypto_ahash_ctx(tfm); - struct aspeed_hace_dev *hace_dev = tctx->hace_dev; - - AHASH_DBG(hace_dev, "req->nbytes:%d, rctx->total:%d\n", - req->nbytes, rctx->total); - rctx->op = SHA_OP_FINAL; - - return aspeed_hace_hash_handle_queue(hace_dev, req); -} - static int aspeed_sham_update(struct ahash_request *req) { struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); @@ -539,22 +475,7 @@ static int aspeed_sham_update(struct ahash_request *req) rctx->total = req->nbytes; rctx->src_sg = req->src; rctx->offset = 0; - rctx->src_nents = sg_nents(req->src); - rctx->op = SHA_OP_UPDATE; - - rctx->digcnt[0] += rctx->total; - if (rctx->digcnt[0] < rctx->total) - rctx->digcnt[1]++; - - if (rctx->bufcnt + rctx->total < rctx->block_size) { - scatterwalk_map_and_copy(rctx->buffer + rctx->bufcnt, - rctx->src_sg, rctx->offset, - rctx->total, 0); - rctx->bufcnt += rctx->total; - - return rctx->flags & SHA_FLAGS_FINUP ? - aspeed_sham_final(req) : 0; - } + rctx->src_nents = sg_nents_for_len(req->src, req->nbytes); return aspeed_hace_hash_handle_queue(hace_dev, req); } @@ -636,7 +557,6 @@ static int aspeed_sham_init(struct ahash_request *req) return -EINVAL; } - rctx->bufcnt = 0; rctx->total = 0; rctx->digcnt[0] = 0; rctx->digcnt[1] = 0; @@ -661,30 +581,11 @@ static int aspeed_sham_cra_init(struct crypto_ahash *tfm) return 0; } -static int aspeed_sham_export(struct ahash_request *req, void *out) -{ - struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - - memcpy(out, rctx, sizeof(*rctx)); - - return 0; -} - -static int aspeed_sham_import(struct ahash_request *req, const void *in) -{ - struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - - memcpy(rctx, in, sizeof(*rctx)); - - return 0; -} - static struct aspeed_hace_alg aspeed_ahash_algs[] = { { .alg.ahash.base = { .init = aspeed_sham_init, .update = aspeed_sham_update, - .final = aspeed_sham_final, .finup = aspeed_sham_finup, .digest = aspeed_sham_digest, .export = aspeed_sham_export, @@ -699,6 +600,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | + CRYPTO_AHASH_ALG_BLOCK_ONLY | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), @@ -716,7 +618,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .alg.ahash.base = { .init = aspeed_sham_init, .update = aspeed_sham_update, - .final = aspeed_sham_final, .finup = aspeed_sham_finup, .digest = aspeed_sham_digest, .export = aspeed_sham_export, @@ -731,6 +632,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | + CRYPTO_AHASH_ALG_BLOCK_ONLY | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA256_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), @@ -748,7 +650,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .alg.ahash.base = { .init = aspeed_sham_init, .update = aspeed_sham_update, - .final = aspeed_sham_final, .finup = aspeed_sham_finup, .digest = aspeed_sham_digest, .export = aspeed_sham_export, @@ -763,6 +664,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs[] = { .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | + CRYPTO_AHASH_ALG_BLOCK_ONLY | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA224_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), @@ -783,7 +685,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { .alg.ahash.base = { .init = aspeed_sham_init, .update = aspeed_sham_update, - .final = aspeed_sham_final, .finup = aspeed_sham_finup, .digest = aspeed_sham_digest, .export = aspeed_sham_export, @@ -798,6 +699,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | + CRYPTO_AHASH_ALG_BLOCK_ONLY | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA384_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), @@ -815,7 +717,6 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { .alg.ahash.base = { .init = aspeed_sham_init, .update = aspeed_sham_update, - .final = aspeed_sham_final, .finup = aspeed_sham_finup, .digest = aspeed_sham_digest, .export = aspeed_sham_export, @@ -830,6 +731,7 @@ static struct aspeed_hace_alg aspeed_ahash_algs_g6[] = { .cra_priority = 300, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | + CRYPTO_AHASH_ALG_BLOCK_ONLY | CRYPTO_ALG_KERN_DRIVER_ONLY, .cra_blocksize = SHA512_BLOCK_SIZE, .cra_ctxsize = sizeof(struct aspeed_sham_ctx), diff --git a/drivers/crypto/aspeed/aspeed-hace.h b/drivers/crypto/aspeed/aspeed-hace.h index ad39954251dd..b1d07730d543 100644 --- a/drivers/crypto/aspeed/aspeed-hace.h +++ b/drivers/crypto/aspeed/aspeed-hace.h @@ -172,7 +172,6 @@ struct aspeed_sham_reqctx { u64 digcnt[2]; unsigned long flags; /* final update flag should no use*/ - unsigned long op; /* final or update */ u32 cmd; /* trigger cmd */ /* walk state */ @@ -185,14 +184,11 @@ struct aspeed_sham_reqctx { size_t block_size; size_t ivsize; - /* remain data buffer */ dma_addr_t buffer_dma_addr; - size_t bufcnt; /* buffer counter */ - dma_addr_t digest_dma_addr; /* This is DMA too but read-only for hardware. */ - u8 buffer[SHA512_BLOCK_SIZE * 2]; + u8 buffer[SHA512_BLOCK_SIZE + 16]; }; struct aspeed_engine_crypto { From 508712228696eaddc4efc706e6a8dd679654f339 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:04:06 +0800 Subject: [PATCH 010/116] crypto: aspeed/hash - Add fallback If a hash request fails due to a DMA mapping error, or if it is too large to fit in the the driver buffer, use a fallback to do the hash rather than failing. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 28 +++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index d7c7f867d6e1..3bfb7db96c40 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -420,6 +420,32 @@ static int aspeed_hace_hash_handle_queue(struct aspeed_hace_dev *hace_dev, hace_dev->crypt_engine_hash, req); } +static noinline int aspeed_ahash_fallback(struct ahash_request *req) +{ + struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); + HASH_FBREQ_ON_STACK(fbreq, req); + u8 *state = rctx->buffer; + struct scatterlist sg[2]; + struct scatterlist *ssg; + int ret; + + ssg = scatterwalk_ffwd(sg, req->src, rctx->offset); + ahash_request_set_crypt(fbreq, ssg, req->result, + rctx->total - rctx->offset); + + ret = aspeed_sham_export(req, state) ?: + crypto_ahash_import_core(fbreq, state); + + if (rctx->flags & SHA_FLAGS_FINUP) + ret = ret ?: crypto_ahash_finup(fbreq); + else + ret = ret ?: crypto_ahash_update(fbreq); + crypto_ahash_export_core(fbreq, state) ?: + aspeed_sham_import(req, state); + HASH_REQUEST_ZERO(fbreq); + return ret; +} + static int aspeed_ahash_do_request(struct crypto_engine *engine, void *areq) { struct ahash_request *req = ahash_request_cast(areq); @@ -434,7 +460,7 @@ static int aspeed_ahash_do_request(struct crypto_engine *engine, void *areq) ret = aspeed_ahash_req_update(hace_dev); if (ret != -EINPROGRESS) - return ret; + return aspeed_ahash_fallback(req); return 0; } From 0602f0ef9308fe8a27c2e3325f8281432a5e0a71 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:04:08 +0800 Subject: [PATCH 011/116] crypto: aspeed/hash - Iterate on large hashes in dma_prepare Rather than failing a hash larger than ASPEED_CRYPTO_SRC_DMA_BUF_LEN, just hash them over and over again until it's done. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 36 +++++++++++++++--------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index 3bfb7db96c40..fc2154947ec8 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -155,26 +155,30 @@ static int aspeed_ahash_dma_prepare(struct aspeed_hace_dev *hace_dev) struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; struct ahash_request *req = hash_engine->req; struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - bool final = rctx->flags & SHA_FLAGS_FINUP; unsigned int length, remain; + bool final = false; - length = rctx->total; - remain = final ? 0 : length % rctx->block_size; + length = rctx->total - rctx->offset; + remain = length - round_down(length, rctx->block_size); AHASH_DBG(hace_dev, "length:0x%x, remain:0x%x\n", length, remain); - if ((final ? round_up(length, rctx->block_size) + rctx->block_size : - length) > ASPEED_CRYPTO_SRC_DMA_BUF_LEN) { - dev_warn(hace_dev->dev, "Hash data length is too large\n"); - return -EINVAL; - } - + if (length > ASPEED_HASH_SRC_DMA_BUF_LEN) + length = ASPEED_HASH_SRC_DMA_BUF_LEN; + else if (rctx->flags & SHA_FLAGS_FINUP) { + if (round_up(length, rctx->block_size) + rctx->block_size > + ASPEED_CRYPTO_SRC_DMA_BUF_LEN) + length = round_down(length - 1, rctx->block_size); + else + final = true; + } else + length -= remain; scatterwalk_map_and_copy(hash_engine->ahash_src_addr, rctx->src_sg, - rctx->offset, rctx->total - remain, 0); - rctx->offset += rctx->total - remain; + rctx->offset, length, 0); + rctx->offset += length; - rctx->digcnt[0] += rctx->total - remain; - if (rctx->digcnt[0] < rctx->total - remain) + rctx->digcnt[0] += length; + if (rctx->digcnt[0] < length) rctx->digcnt[1]++; if (final) @@ -189,7 +193,7 @@ static int aspeed_ahash_dma_prepare(struct aspeed_hace_dev *hace_dev) return -ENOMEM; } - hash_engine->src_length = length - remain; + hash_engine->src_length = length; hash_engine->src_dma = hash_engine->ahash_src_dma_addr; hash_engine->digest_dma = rctx->digest_dma_addr; @@ -385,6 +389,10 @@ static int aspeed_ahash_update_resume(struct aspeed_hace_dev *hace_dev) dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); + if (rctx->total - rctx->offset >= rctx->block_size || + (rctx->total != rctx->offset && rctx->flags & SHA_FLAGS_FINUP)) + return aspeed_ahash_req_update(hace_dev); + return aspeed_ahash_complete(hace_dev); } From 8c8f269a58f8aa245c39e732a35560b5884c3461 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 May 2025 14:04:11 +0800 Subject: [PATCH 012/116] crypto: aspeed/hash - Fix potential overflow in dma_prepare_sg The mapped SG lists are written to hash_engine->ahash_src_addr which has the size ASPEED_HASH_SRC_DMA_BUF_LEN. Since scatterlists are not bound in size, make sure that size is not exceeded. If the mapped SG list is larger than the buffer, simply iterate over it as is done in the dma_prepare case. Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 84 ++++++++++++------------ 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index fc2154947ec8..e54b7dd03be3 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -146,6 +146,15 @@ static int aspeed_ahash_fill_padding(struct aspeed_hace_dev *hace_dev, return padlen + bitslen; } +static void aspeed_ahash_update_counter(struct aspeed_sham_reqctx *rctx, + unsigned int len) +{ + rctx->offset += len; + rctx->digcnt[0] += len; + if (rctx->digcnt[0] < len) + rctx->digcnt[1]++; +} + /* * Prepare DMA buffer before hardware engine * processing. @@ -175,12 +184,7 @@ static int aspeed_ahash_dma_prepare(struct aspeed_hace_dev *hace_dev) length -= remain; scatterwalk_map_and_copy(hash_engine->ahash_src_addr, rctx->src_sg, rctx->offset, length, 0); - rctx->offset += length; - - rctx->digcnt[0] += length; - if (rctx->digcnt[0] < length) - rctx->digcnt[1]++; - + aspeed_ahash_update_counter(rctx, length); if (final) length += aspeed_ahash_fill_padding( hace_dev, rctx, hash_engine->ahash_src_addr + length); @@ -210,13 +214,16 @@ static int aspeed_ahash_dma_prepare_sg(struct aspeed_hace_dev *hace_dev) struct ahash_request *req = hash_engine->req; struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); bool final = rctx->flags & SHA_FLAGS_FINUP; + int remain, sg_len, i, max_sg_nents; + unsigned int length, offset, total; struct aspeed_sg_list *src_list; struct scatterlist *s; - int length, remain, sg_len, i; int rc = 0; - remain = final ? 0 : rctx->total % rctx->block_size; - length = rctx->total - remain; + offset = rctx->offset; + length = rctx->total - offset; + remain = final ? 0 : length - round_down(length, rctx->block_size); + length -= remain; AHASH_DBG(hace_dev, "%s:0x%x, %s:0x%x, %s:0x%x\n", "rctx total", rctx->total, @@ -230,6 +237,8 @@ static int aspeed_ahash_dma_prepare_sg(struct aspeed_hace_dev *hace_dev) goto end; } + max_sg_nents = ASPEED_HASH_SRC_DMA_BUF_LEN / sizeof(*src_list) - final; + sg_len = min(sg_len, max_sg_nents); src_list = (struct aspeed_sg_list *)hash_engine->ahash_src_addr; rctx->digest_dma_addr = dma_map_single(hace_dev->dev, rctx->digest, SHA512_DIGEST_SIZE, @@ -240,10 +249,20 @@ static int aspeed_ahash_dma_prepare_sg(struct aspeed_hace_dev *hace_dev) goto free_src_sg; } + total = 0; for_each_sg(rctx->src_sg, s, sg_len, i) { u32 phy_addr = sg_dma_address(s); u32 len = sg_dma_len(s); + if (len <= offset) { + offset -= len; + continue; + } + + len -= offset; + phy_addr += offset; + offset = 0; + if (length > len) length -= len; else { @@ -252,24 +271,22 @@ static int aspeed_ahash_dma_prepare_sg(struct aspeed_hace_dev *hace_dev) length = 0; } + total += len; src_list[i].phy_addr = cpu_to_le32(phy_addr); src_list[i].len = cpu_to_le32(len); } if (length != 0) { - rc = -EINVAL; - goto free_rctx_digest; + total = round_down(total, rctx->block_size); + final = false; } - rctx->digcnt[0] += rctx->total - remain; - if (rctx->digcnt[0] < rctx->total - remain) - rctx->digcnt[1]++; - + aspeed_ahash_update_counter(rctx, total); if (final) { int len = aspeed_ahash_fill_padding(hace_dev, rctx, rctx->buffer); - rctx->total += len; + total += len; rctx->buffer_dma_addr = dma_map_single(hace_dev->dev, rctx->buffer, sizeof(rctx->buffer), @@ -286,8 +303,7 @@ static int aspeed_ahash_dma_prepare_sg(struct aspeed_hace_dev *hace_dev) } src_list[i - 1].len |= cpu_to_le32(HASH_SG_LAST_LIST); - rctx->offset = rctx->total - remain; - hash_engine->src_length = rctx->total - remain; + hash_engine->src_length = total; hash_engine->src_dma = hash_engine->ahash_src_dma_addr; hash_engine->digest_dma = rctx->digest_dma_addr; @@ -311,6 +327,13 @@ static int aspeed_ahash_complete(struct aspeed_hace_dev *hace_dev) AHASH_DBG(hace_dev, "\n"); + dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, + SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); + + if (rctx->total - rctx->offset >= rctx->block_size || + (rctx->total != rctx->offset && rctx->flags & SHA_FLAGS_FINUP)) + return aspeed_ahash_req_update(hace_dev); + hash_engine->flags &= ~CRYPTO_FLAGS_BUSY; if (rctx->flags & SHA_FLAGS_FINUP) @@ -366,36 +389,15 @@ static int aspeed_ahash_update_resume_sg(struct aspeed_hace_dev *hace_dev) dma_unmap_sg(hace_dev->dev, rctx->src_sg, rctx->src_nents, DMA_TO_DEVICE); - if (rctx->flags & SHA_FLAGS_FINUP) + if (rctx->flags & SHA_FLAGS_FINUP && rctx->total == rctx->offset) dma_unmap_single(hace_dev->dev, rctx->buffer_dma_addr, sizeof(rctx->buffer), DMA_TO_DEVICE); - dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, - SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); - rctx->cmd &= ~HASH_CMD_HASH_SRC_SG_CTRL; return aspeed_ahash_complete(hace_dev); } -static int aspeed_ahash_update_resume(struct aspeed_hace_dev *hace_dev) -{ - struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; - struct ahash_request *req = hash_engine->req; - struct aspeed_sham_reqctx *rctx = ahash_request_ctx(req); - - AHASH_DBG(hace_dev, "\n"); - - dma_unmap_single(hace_dev->dev, rctx->digest_dma_addr, - SHA512_DIGEST_SIZE, DMA_BIDIRECTIONAL); - - if (rctx->total - rctx->offset >= rctx->block_size || - (rctx->total != rctx->offset && rctx->flags & SHA_FLAGS_FINUP)) - return aspeed_ahash_req_update(hace_dev); - - return aspeed_ahash_complete(hace_dev); -} - static int aspeed_ahash_req_update(struct aspeed_hace_dev *hace_dev) { struct aspeed_engine_hash *hash_engine = &hace_dev->hash_engine; @@ -411,7 +413,7 @@ static int aspeed_ahash_req_update(struct aspeed_hace_dev *hace_dev) resume = aspeed_ahash_update_resume_sg; } else { - resume = aspeed_ahash_update_resume; + resume = aspeed_ahash_complete; } ret = hash_engine->dma_prepare(hace_dev); From b6cd3cfb5afe49952f8f6be947aeeca9ba0faebb Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Mon, 19 May 2025 18:13:48 +0300 Subject: [PATCH 013/116] crypto: sun8i-ce - fix nents passed to dma_unmap_sg() In sun8i_ce_cipher_unprepare(), dma_unmap_sg() is incorrectly called with the number of entries returned by dma_map_sg(), rather than using the original number of entries passed when mapping the scatterlist. To fix this, stash the original number of entries passed to dma_map_sg() in the request context. Fixes: 0605fa0f7826 ("crypto: sun8i-ce - split into prepare/run/unprepare") Signed-off-by: Ovidiu Panait Acked-by: Corentin LABBE Tested-by: Corentin LABBE Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index f9cf00d690e2..7cd3b13f3bdc 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -278,8 +278,8 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req } chan->timeout = areq->cryptlen; - rctx->nr_sgs = nr_sgs; - rctx->nr_sgd = nr_sgd; + rctx->nr_sgs = ns; + rctx->nr_sgd = nd; return 0; theend_sgs: From 67a4ad04e3d530f50497205ada07c78a89f90de7 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Mon, 19 May 2025 18:13:49 +0300 Subject: [PATCH 014/116] crypto: sun8i-ce - remove ivlen field of sun8i_cipher_req_ctx Remove `ivlen` field of `sun8i_cipher_req_ctx`, as it is not really useful. The iv length returned by crypto_skcipher_ivsize() is already available everywhere and can be used instead. Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c | 11 ++++++----- drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h | 2 -- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c index 7cd3b13f3bdc..5663df49dd81 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-cipher.c @@ -206,15 +206,14 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req cet->t_key = desc_addr_val_le32(ce, rctx->addr_key); ivsize = crypto_skcipher_ivsize(tfm); - if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { - rctx->ivlen = ivsize; + if (areq->iv && ivsize > 0) { if (rctx->op_dir & CE_DECRYPTION) { offset = areq->cryptlen - ivsize; scatterwalk_map_and_copy(chan->backup_iv, areq->src, offset, ivsize, 0); } memcpy(chan->bounce_iv, areq->iv, ivsize); - rctx->addr_iv = dma_map_single(ce->dev, chan->bounce_iv, rctx->ivlen, + rctx->addr_iv = dma_map_single(ce->dev, chan->bounce_iv, ivsize, DMA_TO_DEVICE); if (dma_mapping_error(ce->dev, rctx->addr_iv)) { dev_err(ce->dev, "Cannot DMA MAP IV\n"); @@ -296,7 +295,8 @@ static int sun8i_ce_cipher_prepare(struct crypto_engine *engine, void *async_req theend_iv: if (areq->iv && ivsize > 0) { if (!dma_mapping_error(ce->dev, rctx->addr_iv)) - dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE); + dma_unmap_single(ce->dev, rctx->addr_iv, ivsize, + DMA_TO_DEVICE); offset = areq->cryptlen - ivsize; if (rctx->op_dir & CE_DECRYPTION) { @@ -345,7 +345,8 @@ static void sun8i_ce_cipher_unprepare(struct crypto_engine *engine, if (areq->iv && ivsize > 0) { if (cet->t_iv) - dma_unmap_single(ce->dev, rctx->addr_iv, rctx->ivlen, DMA_TO_DEVICE); + dma_unmap_single(ce->dev, rctx->addr_iv, ivsize, + DMA_TO_DEVICE); offset = areq->cryptlen - ivsize; if (rctx->op_dir & CE_DECRYPTION) { memcpy(areq->iv, chan->backup_iv, ivsize); diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h index 83df4d719053..0f9a89067016 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce.h @@ -260,7 +260,6 @@ static inline __le32 desc_addr_val_le32(struct sun8i_ce_dev *dev, * struct sun8i_cipher_req_ctx - context for a skcipher request * @op_dir: direction (encrypt vs decrypt) for this request * @flow: the flow to use for this request - * @ivlen: size of bounce_iv * @nr_sgs: The number of source SG (as given by dma_map_sg()) * @nr_sgd: The number of destination SG (as given by dma_map_sg()) * @addr_iv: The IV addr returned by dma_map_single, need to unmap later @@ -270,7 +269,6 @@ static inline __le32 desc_addr_val_le32(struct sun8i_ce_dev *dev, struct sun8i_cipher_req_ctx { u32 op_dir; int flow; - unsigned int ivlen; int nr_sgs; int nr_sgd; dma_addr_t addr_iv; From 003bb3745920cfa754d69a6c32d2c06b561af75b Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Mon, 19 May 2025 18:13:50 +0300 Subject: [PATCH 015/116] crypto: sun8i-ce - use helpers to get hash block and digest sizes Use crypto_ahash_blocksize() and crypto_ahash_digestsize() helpers instead of directly accessing 'struct ahash_alg' members. Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu --- drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index bef44f350167..13bdfb8a2c62 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -342,8 +342,8 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) algt = container_of(alg, struct sun8i_ce_alg_template, alg.hash.base); ce = algt->ce; - bs = algt->alg.hash.base.halg.base.cra_blocksize; - digestsize = algt->alg.hash.base.halg.digestsize; + bs = crypto_ahash_blocksize(tfm); + digestsize = crypto_ahash_digestsize(tfm); if (digestsize == SHA224_DIGEST_SIZE) digestsize = SHA256_DIGEST_SIZE; if (digestsize == SHA384_DIGEST_SIZE) @@ -455,7 +455,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) err_unmap_result: dma_unmap_single(ce->dev, addr_res, digestsize, DMA_FROM_DEVICE); if (!err) - memcpy(areq->result, result, algt->alg.hash.base.halg.digestsize); + memcpy(areq->result, result, crypto_ahash_digestsize(tfm)); err_unmap_src: dma_unmap_sg(ce->dev, areq->src, ns, DMA_TO_DEVICE); From aaeff14688d0254b39731d9bb303c79bfd610f7d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 19 May 2025 10:21:01 -0500 Subject: [PATCH 016/116] crypto: ccp - Add missing bootloader info reg for pspv6 The bootloader info reg for pspv6 is the same as pspv4 and pspv5. Suggested-by: Tom Lendacky Signed-off-by: Mario Limonciello Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sp-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index e1be2072d680..e7bb803912a6 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -453,6 +453,7 @@ static const struct psp_vdata pspv6 = { .cmdresp_reg = 0x10944, /* C2PMSG_17 */ .cmdbuff_addr_lo_reg = 0x10948, /* C2PMSG_18 */ .cmdbuff_addr_hi_reg = 0x1094c, /* C2PMSG_19 */ + .bootloader_info_reg = 0x109ec, /* C2PMSG_59 */ .feature_reg = 0x109fc, /* C2PMSG_63 */ .inten_reg = 0x10510, /* P2CMSG_INTEN */ .intsts_reg = 0x10514, /* P2CMSG_INTSTS */ From 9d50a25eeb05c45fef46120f4527885a14c84fb2 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 21 May 2025 14:55:19 +0200 Subject: [PATCH 017/116] crypto: testmgr - desupport SHA-1 for FIPS 140 The sunset period of SHA-1 is approaching [1] and FIPS 140 certificates have a validity of 5 years. Any distros starting FIPS certification for their kernels now would therefore most likely end up on the NIST Cryptographic Module Validation Program "historical" list before their certification expires. While SHA-1 is technically still allowed until Dec. 31, 2030, it is heavily discouraged by NIST and it makes sense to set .fips_allowed to 0 now for any crypto algorithms that reference it in order to avoid any costly surprises down the line. [1]: https://www.nist.gov/news-events/news/2022/12/nist-retires-sha-1-cryptographic-algorithm Acked-by: Stephan Mueller Cc: Marcus Meissner Cc: Jarod Wilson Cc: Neil Horman Cc: John Haxby Signed-off-by: Vegard Nossum Signed-off-by: Herbert Xu --- crypto/testmgr.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 72005074a5c2..a4ad939e03c9 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4229,7 +4229,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "authenc(hmac(sha1),cbc(aes))", .test = alg_test_aead, - .fips_allowed = 1, .suite = { .aead = __VECS(hmac_sha1_aes_cbc_tv_temp) } @@ -4248,7 +4247,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "authenc(hmac(sha1),ctr(aes))", .test = alg_test_null, - .fips_allowed = 1, }, { .alg = "authenc(hmac(sha1),ecb(cipher_null))", .test = alg_test_aead, @@ -4258,7 +4256,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "authenc(hmac(sha1),rfc3686(ctr(aes)))", .test = alg_test_null, - .fips_allowed = 1, }, { .alg = "authenc(hmac(sha224),cbc(des))", .test = alg_test_aead, @@ -5100,7 +5097,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "hmac(sha1)", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = __VECS(hmac_sha1_tv_template) } @@ -5436,7 +5432,6 @@ static const struct alg_test_desc alg_test_descs[] = { }, { .alg = "sha1", .test = alg_test_hash, - .fips_allowed = 1, .suite = { .hash = __VECS(sha1_tv_template) } From 4cc871ad0173e8bc22f80e3609e34d546d30ef1a Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 22 May 2025 09:21:41 +0100 Subject: [PATCH 018/116] crypto: qat - use unmanaged allocation for dc_data The dc_data structure holds data required for handling compression operations, such as overflow buffers. In this context, the use of managed memory allocation APIs (devm_kzalloc() and devm_kfree()) is not necessary, as these data structures are freed and re-allocated when a device is restarted in adf_dev_down() and adf_dev_up(). Additionally, managed APIs automatically handle memory cleanup when the device is detached, which can lead to conflicts with manual cleanup processes. Specifically, if a device driver invokes the adf_dev_down() function as part of the cleanup registered with devm_add_action_or_reset(), it may attempt to free memory that is also managed by the device's resource management system, potentially leading to a double-free. This might result in a warning similar to the following when unloading the device specific driver, for example qat_6xxx.ko: qat_free_dc_data+0x4f/0x60 [intel_qat] qat_compression_event_handler+0x3d/0x1d0 [intel_qat] adf_dev_shutdown+0x6d/0x1a0 [intel_qat] adf_dev_down+0x32/0x50 [intel_qat] devres_release_all+0xb8/0x110 device_unbind_cleanup+0xe/0x70 device_release_driver_internal+0x1c1/0x200 driver_detach+0x48/0x90 bus_remove_driver+0x74/0xf0 pci_unregister_driver+0x2e/0xb0 Use unmanaged memory allocation APIs (kzalloc_node() and kfree()) for the dc_data structure. This ensures that memory is explicitly allocated and freed under the control of the driver code, preventing manual deallocation from interfering with automatic cleanup. Fixes: 1198ae56c9a5 ("crypto: qat - expose deflate through acomp api for QAT GEN2") Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/qat_compression.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/qat_compression.c b/drivers/crypto/intel/qat/qat_common/qat_compression.c index c285b45b8679..0a77ca65c8d4 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_compression.c +++ b/drivers/crypto/intel/qat/qat_common/qat_compression.c @@ -196,7 +196,7 @@ static int qat_compression_alloc_dc_data(struct adf_accel_dev *accel_dev) struct adf_dc_data *dc_data = NULL; u8 *obuff = NULL; - dc_data = devm_kzalloc(dev, sizeof(*dc_data), GFP_KERNEL); + dc_data = kzalloc_node(sizeof(*dc_data), GFP_KERNEL, dev_to_node(dev)); if (!dc_data) goto err; @@ -234,7 +234,7 @@ static void qat_free_dc_data(struct adf_accel_dev *accel_dev) dma_unmap_single(dev, dc_data->ovf_buff_p, dc_data->ovf_buff_sz, DMA_FROM_DEVICE); kfree_sensitive(dc_data->ovf_buff); - devm_kfree(dev, dc_data); + kfree(dc_data); accel_dev->dc_data = NULL; } From 0e801fe7d8103049fb2da1646ccc47e8c6d32596 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 May 2025 17:32:47 +0800 Subject: [PATCH 019/116] crypto: marvell/cesa - Remove unnecessary state setting on final There is no point in setting the hash state after finalisation since the hash state must never be used again. Remove that code. Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa/hash.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index 6815eddc9068..8b4a3f291926 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -362,16 +362,13 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ && (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_RESULT) { - __le32 *data = NULL; + const void *data; /* * Result is already in the correct endianness when the SA is * used */ data = creq->base.chain.last->op->ctx.hash.hash; - for (i = 0; i < digsize / 4; i++) - creq->state[i] = le32_to_cpu(data[i]); - memcpy(ahashreq->result, data, digsize); } else { for (i = 0; i < digsize / 4; i++) From 2157e50f65d2030f07ea27ef7ac4cfba772e98ac Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 22 May 2025 15:36:24 +0530 Subject: [PATCH 020/116] crypto: octeontx2 - add timeout for load_fvc completion poll Adds timeout to exit from possible infinite loop, which polls on CPT instruction(load_fvc) completion. Signed-off-by: Srujana Challa Signed-off-by: Bharat Bhushan Signed-off-by: Herbert Xu --- .../crypto/marvell/octeontx2/otx2_cptpf_ucode.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 78367849c3d5..9095dea2748d 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1494,6 +1494,7 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) dma_addr_t rptr_baddr; struct pci_dev *pdev; u32 len, compl_rlen; + int timeout = 10000; int ret, etype; void *rptr; @@ -1554,16 +1555,27 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) etype); otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr); lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]); + timeout = 10000; while (lfs->ops->cpt_get_compcode(result) == - OTX2_CPT_COMPLETION_CODE_INIT) + OTX2_CPT_COMPLETION_CODE_INIT) { cpu_relax(); + udelay(1); + timeout--; + if (!timeout) { + ret = -ENODEV; + cptpf->is_eng_caps_discovered = false; + dev_warn(&pdev->dev, "Timeout on CPT load_fvc completion poll\n"); + goto error_no_response; + } + } cptpf->eng_caps[etype].u = be64_to_cpup(rptr); } - dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); cptpf->is_eng_caps_discovered = true; +error_no_response: + dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); free_result: kfree(result); lf_cleanup: From b7b88b4939e71ef2aed8238976a2bbabcb63a790 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 22 May 2025 15:36:25 +0530 Subject: [PATCH 021/116] crypto: octeontx2 - Fix address alignment issue on ucode loading octeontx2 crypto driver allocates memory using kmalloc/kzalloc, and uses this memory for dma (does dma_map_single()). It assumes that kmalloc/kzalloc will return 128-byte aligned address. But kmalloc/kzalloc returns 8-byte aligned address after below changes: "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the smaller cache_line_size()" Completion address should be 32-Byte alignment when loading microcode. Signed-off-by: Bharat Bhushan Cc: # v6.5+ Signed-off-by: Herbert Xu --- .../marvell/octeontx2/otx2_cptpf_ucode.c | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 9095dea2748d..56645b3eb717 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1491,12 +1491,13 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) union otx2_cpt_opcode opcode; union otx2_cpt_res_s *result; union otx2_cpt_inst_s inst; + dma_addr_t result_baddr; dma_addr_t rptr_baddr; struct pci_dev *pdev; - u32 len, compl_rlen; int timeout = 10000; + void *base, *rptr; int ret, etype; - void *rptr; + u32 len; /* * We don't get capabilities if it was already done @@ -1519,22 +1520,28 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) if (ret) goto delete_grps; - compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN); - len = compl_rlen + LOADFVC_RLEN; + /* Allocate extra memory for "rptr" and "result" pointer alignment */ + len = LOADFVC_RLEN + ARCH_DMA_MINALIGN + + sizeof(union otx2_cpt_res_s) + OTX2_CPT_RES_ADDR_ALIGN; - result = kzalloc(len, GFP_KERNEL); - if (!result) { + base = kzalloc(len, GFP_KERNEL); + if (!base) { ret = -ENOMEM; goto lf_cleanup; } - rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len, - DMA_BIDIRECTIONAL); + + rptr = PTR_ALIGN(base, ARCH_DMA_MINALIGN); + rptr_baddr = dma_map_single(&pdev->dev, rptr, len, DMA_BIDIRECTIONAL); if (dma_mapping_error(&pdev->dev, rptr_baddr)) { dev_err(&pdev->dev, "DMA mapping failed\n"); ret = -EFAULT; - goto free_result; + goto free_rptr; } - rptr = (u8 *)result + compl_rlen; + + result = (union otx2_cpt_res_s *)PTR_ALIGN(rptr + LOADFVC_RLEN, + OTX2_CPT_RES_ADDR_ALIGN); + result_baddr = ALIGN(rptr_baddr + LOADFVC_RLEN, + OTX2_CPT_RES_ADDR_ALIGN); /* Fill in the command */ opcode.s.major = LOADFVC_MAJOR_OP; @@ -1546,14 +1553,14 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) /* 64-bit swap for microcode data reads, not needed for addresses */ cpu_to_be64s(&iq_cmd.cmd.u); iq_cmd.dptr = 0; - iq_cmd.rptr = rptr_baddr + compl_rlen; + iq_cmd.rptr = rptr_baddr; iq_cmd.cptr.u = 0; for (etype = 1; etype < OTX2_CPT_MAX_ENG_TYPES; etype++) { result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT; iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps, etype); - otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr); + otx2_cpt_fill_inst(&inst, &iq_cmd, result_baddr); lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]); timeout = 10000; @@ -1576,8 +1583,8 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) error_no_response: dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); -free_result: - kfree(result); +free_rptr: + kfree(base); lf_cleanup: otx2_cptlf_shutdown(lfs); delete_grps: From 2e13163b43e6bb861182ea999a80dd1d893c0cbf Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 22 May 2025 15:36:26 +0530 Subject: [PATCH 022/116] crypto: octeontx2 - Fix address alignment on CN10K A0/A1 and OcteonTX2 octeontx2 crypto driver allocates memory using kmalloc/kzalloc, and uses this memory for dma (does dma_map_single()). It assumes that kmalloc/kzalloc will return 128-byte aligned address. But kmalloc/kzalloc returns 8-byte aligned address after below changes: "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the smaller cache_line_size() Memory allocated are used for following purpose: - Input data or scatter list address - 8-Byte alignment - Output data or gather list address - 8-Byte alignment - Completion address - 32-Byte alignment. This patch ensures all addresses are aligned as mentioned above. Signed-off-by: Bharat Bhushan Cc: # v6.5+ Signed-off-by: Herbert Xu --- .../marvell/octeontx2/otx2_cpt_reqmgr.h | 66 ++++++++++++++----- 1 file changed, 51 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index e27e849b01df..98de93851ba1 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -34,6 +34,9 @@ #define SG_COMP_2 2 #define SG_COMP_1 1 +#define OTX2_CPT_DPTR_RPTR_ALIGN 8 +#define OTX2_CPT_RES_ADDR_ALIGN 32 + union otx2_cpt_opcode { u16 flags; struct { @@ -417,10 +420,9 @@ static inline struct otx2_cpt_inst_info * otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, gfp_t gfp) { - int align = OTX2_CPT_DMA_MINALIGN; struct otx2_cpt_inst_info *info; - u32 dlen, align_dlen, info_len; - u16 g_sz_bytes, s_sz_bytes; + u32 dlen, info_len; + u16 g_len, s_len; u32 total_mem_len; if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT || @@ -429,22 +431,54 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, return NULL; } - g_sz_bytes = ((req->in_cnt + 3) / 4) * - sizeof(struct otx2_cpt_sglist_component); - s_sz_bytes = ((req->out_cnt + 3) / 4) * - sizeof(struct otx2_cpt_sglist_component); + /* Allocate memory to meet below alignment requirement: + * ------------------------------------ + * | struct otx2_cpt_inst_info | + * | (No alignment required) | + * | --------------------------------| + * | | padding for ARCH_DMA_MINALIGN | + * | | alignment | + * |------------------------------------| + * | SG List Header of 8 Byte | + * |------------------------------------| + * | SG List Gather/Input memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * |---------------------------------- | + * | SG List Scatter/Output memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * | -------------------------------| + * | | padding for 32B alignment | + * |------------------------------------| + * | Result response memory | + * | Alignment = 32Byte | + * ------------------------------------ + */ - dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; - align_dlen = ALIGN(dlen, align); - info_len = ALIGN(sizeof(*info), align); - total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s); + info_len = sizeof(*info); + + g_len = ((req->in_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + s_len = ((req->out_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + + dlen = g_len + s_len + SG_LIST_HDR_SIZE; + + /* Allocate extra memory for SG and response address alignment */ + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN); + total_mem_len += (ARCH_DMA_MINALIGN - 1) & + ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1); + total_mem_len += ALIGN(dlen, OTX2_CPT_RES_ADDR_ALIGN); + total_mem_len += sizeof(union otx2_cpt_res_s); info = kzalloc(total_mem_len, gfp); if (unlikely(!info)) return NULL; info->dlen = dlen; - info->in_buffer = (u8 *)info + info_len; + info->in_buffer = PTR_ALIGN((u8 *)info + info_len, ARCH_DMA_MINALIGN); + info->out_buffer = info->in_buffer + SG_LIST_HDR_SIZE + g_len; ((u16 *)info->in_buffer)[0] = req->out_cnt; ((u16 *)info->in_buffer)[1] = req->in_cnt; @@ -460,7 +494,7 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, } if (setup_sgio_components(pdev, req->out, req->out_cnt, - &info->in_buffer[8 + g_sz_bytes])) { + info->out_buffer)) { dev_err(&pdev->dev, "Failed to setup scatter list\n"); goto destroy_info; } @@ -476,8 +510,10 @@ otx2_sg_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, * Get buffer for union otx2_cpt_res_s response * structure and its physical address */ - info->completion_addr = info->in_buffer + align_dlen; - info->comp_baddr = info->dptr_baddr + align_dlen; + info->completion_addr = PTR_ALIGN((info->in_buffer + dlen), + OTX2_CPT_RES_ADDR_ALIGN); + info->comp_baddr = ALIGN((info->dptr_baddr + dlen), + OTX2_CPT_RES_ADDR_ALIGN); return info; From a091a58b8a1eba2f243b0c05bcc82bdc2a4a338d Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Thu, 22 May 2025 15:36:27 +0530 Subject: [PATCH 023/116] crypto: octeontx2 - Fix address alignment on CN10KB and CN10KA-B0 octeontx2 crypto driver allocates memory using kmalloc/kzalloc, and uses this memory for dma (does dma_map_single()). It assumes that kmalloc/kzalloc will return 128-byte aligned address. But kmalloc/kzalloc returns 8-byte aligned address after below changes: "9382bc44b5f5 arm64: allow kmalloc() caches aligned to the smaller cache_line_size() Memory allocated are used for following purpose: - Input data or scatter list address - 8-Byte alignment - Output data or gather list address - 8-Byte alignment - Completion address - 32-Byte alignment. This patch ensures all addresses are aligned as mentioned above. Signed-off-by: Bharat Bhushan Cc: # v6.8+ Signed-off-by: Herbert Xu --- .../marvell/octeontx2/otx2_cpt_reqmgr.h | 59 ++++++++++++++----- 1 file changed, 44 insertions(+), 15 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index 98de93851ba1..90a031421aac 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -350,22 +350,48 @@ static inline struct otx2_cpt_inst_info * cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, gfp_t gfp) { - u32 dlen = 0, g_len, sg_len, info_len; - int align = OTX2_CPT_DMA_MINALIGN; + u32 dlen = 0, g_len, s_len, sg_len, info_len; struct otx2_cpt_inst_info *info; - u16 g_sz_bytes, s_sz_bytes; u32 total_mem_len; int i; - g_sz_bytes = ((req->in_cnt + 2) / 3) * - sizeof(struct cn10kb_cpt_sglist_component); - s_sz_bytes = ((req->out_cnt + 2) / 3) * - sizeof(struct cn10kb_cpt_sglist_component); + /* Allocate memory to meet below alignment requirement: + * ------------------------------------ + * | struct otx2_cpt_inst_info | + * | (No alignment required) | + * | --------------------------------| + * | | padding for ARCH_DMA_MINALIGN | + * | | alignment | + * |------------------------------------| + * | SG List Gather/Input memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * |---------------------------------- | + * | SG List Scatter/Output memory | + * | Length = multiple of 32Bytes | + * | Alignment = 8Byte | + * | -------------------------------| + * | | padding for 32B alignment | + * |------------------------------------| + * | Result response memory | + * | Alignment = 32Byte | + * ------------------------------------ + */ - g_len = ALIGN(g_sz_bytes, align); - sg_len = ALIGN(g_len + s_sz_bytes, align); - info_len = ALIGN(sizeof(*info), align); - total_mem_len = sg_len + info_len + sizeof(union otx2_cpt_res_s); + info_len = sizeof(*info); + + g_len = ((req->in_cnt + 2) / 3) * + sizeof(struct cn10kb_cpt_sglist_component); + s_len = ((req->out_cnt + 2) / 3) * + sizeof(struct cn10kb_cpt_sglist_component); + sg_len = g_len + s_len; + + /* Allocate extra memory for SG and response address alignment */ + total_mem_len = ALIGN(info_len, OTX2_CPT_DPTR_RPTR_ALIGN); + total_mem_len += (ARCH_DMA_MINALIGN - 1) & + ~(OTX2_CPT_DPTR_RPTR_ALIGN - 1); + total_mem_len += ALIGN(sg_len, OTX2_CPT_RES_ADDR_ALIGN); + total_mem_len += sizeof(union otx2_cpt_res_s); info = kzalloc(total_mem_len, gfp); if (unlikely(!info)) @@ -375,7 +401,8 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, dlen += req->in[i].size; info->dlen = dlen; - info->in_buffer = (u8 *)info + info_len; + info->in_buffer = PTR_ALIGN((u8 *)info + info_len, ARCH_DMA_MINALIGN); + info->out_buffer = info->in_buffer + g_len; info->gthr_sz = req->in_cnt; info->sctr_sz = req->out_cnt; @@ -387,7 +414,7 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, } if (sgv2io_components_setup(pdev, req->out, req->out_cnt, - &info->in_buffer[g_len])) { + info->out_buffer)) { dev_err(&pdev->dev, "Failed to setup scatter list\n"); goto destroy_info; } @@ -404,8 +431,10 @@ cn10k_sgv2_info_create(struct pci_dev *pdev, struct otx2_cpt_req_info *req, * Get buffer for union otx2_cpt_res_s response * structure and its physical address */ - info->completion_addr = info->in_buffer + sg_len; - info->comp_baddr = info->dptr_baddr + sg_len; + info->completion_addr = PTR_ALIGN((info->in_buffer + sg_len), + OTX2_CPT_RES_ADDR_ALIGN); + info->comp_baddr = ALIGN((info->dptr_baddr + sg_len), + OTX2_CPT_RES_ADDR_ALIGN); return info; From 442134ab30e75b7229c4bfc1ac5641d245cffe27 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 May 2025 20:41:28 +0800 Subject: [PATCH 024/116] crypto: marvell/cesa - Fix engine load inaccuracy If an error occurs during queueing the engine load will never be decremented. Fix this by moving the engine load adjustment into the cleanup function. Fixes: bf8f91e71192 ("crypto: marvell - Add load balancing between engines") Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa/cipher.c | 4 +++- drivers/crypto/marvell/cesa/hash.c | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c index 48c5c8ea8c43..3fe0fd9226cf 100644 --- a/drivers/crypto/marvell/cesa/cipher.c +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -75,9 +75,12 @@ mv_cesa_skcipher_dma_cleanup(struct skcipher_request *req) static inline void mv_cesa_skcipher_cleanup(struct skcipher_request *req) { struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + struct mv_cesa_engine *engine = creq->base.engine; if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_skcipher_dma_cleanup(req); + + atomic_sub(req->cryptlen, &engine->load); } static void mv_cesa_skcipher_std_step(struct skcipher_request *req) @@ -212,7 +215,6 @@ mv_cesa_skcipher_complete(struct crypto_async_request *req) struct mv_cesa_engine *engine = creq->base.engine; unsigned int ivsize; - atomic_sub(skreq->cryptlen, &engine->load); ivsize = crypto_skcipher_ivsize(crypto_skcipher_reqtfm(skreq)); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c index 8b4a3f291926..5103d36cdfdb 100644 --- a/drivers/crypto/marvell/cesa/hash.c +++ b/drivers/crypto/marvell/cesa/hash.c @@ -110,9 +110,12 @@ static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req) static inline void mv_cesa_ahash_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_engine *engine = creq->base.engine; if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_cleanup(req); + + atomic_sub(req->nbytes, &engine->load); } static void mv_cesa_ahash_last_cleanup(struct ahash_request *req) @@ -392,8 +395,6 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) } } } - - atomic_sub(ahashreq->nbytes, &engine->load); } static void mv_cesa_ahash_prepare(struct crypto_async_request *req, From 1b39bc4a703a63a22c08232015540adfb31f22ba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 23 May 2025 19:24:34 +0800 Subject: [PATCH 025/116] crypto: s390/hmac - Fix counter in export state The hmac export state needs to be one block-size bigger to account for the ipad. Reported-by: Ingo Franzki Fixes: 08811169ac01 ("crypto: s390/hmac - Use API partial block handling") Signed-off-by: Herbert Xu --- arch/s390/crypto/hmac_s390.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/s390/crypto/hmac_s390.c b/arch/s390/crypto/hmac_s390.c index 93a1098d9f8d..58444da9b004 100644 --- a/arch/s390/crypto/hmac_s390.c +++ b/arch/s390/crypto/hmac_s390.c @@ -290,6 +290,7 @@ static int s390_hmac_export(struct shash_desc *desc, void *out) struct s390_kmac_sha2_ctx *ctx = shash_desc_ctx(desc); unsigned int bs = crypto_shash_blocksize(desc->tfm); unsigned int ds = bs / 2; + u64 lo = ctx->buflen[0]; union { u8 *u8; u64 *u64; @@ -301,9 +302,10 @@ static int s390_hmac_export(struct shash_desc *desc, void *out) else memcpy(p.u8, ctx->param, ds); p.u8 += ds; - put_unaligned(ctx->buflen[0], p.u64++); + lo += bs; + put_unaligned(lo, p.u64++); if (ds == SHA512_DIGEST_SIZE) - put_unaligned(ctx->buflen[1], p.u64); + put_unaligned(ctx->buflen[1] + (lo < bs), p.u64); return err; } @@ -316,14 +318,16 @@ static int s390_hmac_import(struct shash_desc *desc, const void *in) const u8 *u8; const u64 *u64; } p = { .u8 = in }; + u64 lo; int err; err = s390_hmac_sha2_init(desc); memcpy(ctx->param, p.u8, ds); p.u8 += ds; - ctx->buflen[0] = get_unaligned(p.u64++); + lo = get_unaligned(p.u64++); + ctx->buflen[0] = lo - bs; if (ds == SHA512_DIGEST_SIZE) - ctx->buflen[1] = get_unaligned(p.u64); + ctx->buflen[1] = get_unaligned(p.u64) - (lo < bs); if (ctx->buflen[0] | ctx->buflen[1]) ctx->gr0.ikp = 1; return err; From 73c2437109c3eab274258a6430ae5dafac1ef43e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 23 May 2025 20:28:56 +0800 Subject: [PATCH 026/116] crypto: s390/sha3 - Use cpu byte-order when exporting The sha3 partial hash on s390 is in little-endian just like the final hash. However the generic implementation produces native or big-endian partial hashes. Make s390 sha3 conform to that by doing the byte-swap on export and import. Reported-by: Ingo Franzki Fixes: 6f90ba706551 ("crypto: s390/sha3 - Use API partial block handling") Signed-off-by: Herbert Xu --- arch/s390/crypto/sha.h | 3 +++ arch/s390/crypto/sha3_256_s390.c | 22 ++++++++++++++++------ arch/s390/crypto/sha3_512_s390.c | 23 ++++++++++++++++------- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index d757ccbce2b4..cadb4b13622a 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -27,6 +27,9 @@ struct s390_sha_ctx { u64 state[SHA512_DIGEST_SIZE / sizeof(u64)]; u64 count_hi; } sha512; + struct { + __le64 state[SHA3_STATE_SIZE / sizeof(u64)]; + } sha3; }; int func; /* KIMD function to use */ bool first_message_part; diff --git a/arch/s390/crypto/sha3_256_s390.c b/arch/s390/crypto/sha3_256_s390.c index 4a7731ac6bcd..03bb4f4bab70 100644 --- a/arch/s390/crypto/sha3_256_s390.c +++ b/arch/s390/crypto/sha3_256_s390.c @@ -35,23 +35,33 @@ static int sha3_256_init(struct shash_desc *desc) static int sha3_256_export(struct shash_desc *desc, void *out) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - struct sha3_state *octx = out; + union { + u8 *u8; + u64 *u64; + } p = { .u8 = out }; + int i; if (sctx->first_message_part) { - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->first_message_part = 0; + memset(out, 0, SHA3_STATE_SIZE); + return 0; } - memcpy(octx->st, sctx->state, sizeof(octx->st)); + for (i = 0; i < SHA3_STATE_SIZE / 8; i++) + put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++); return 0; } static int sha3_256_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; + union { + const u8 *u8; + const u64 *u64; + } p = { .u8 = in }; + int i; + for (i = 0; i < SHA3_STATE_SIZE / 8; i++) + sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++)); sctx->count = 0; - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); sctx->first_message_part = 0; sctx->func = CPACF_KIMD_SHA3_256; diff --git a/arch/s390/crypto/sha3_512_s390.c b/arch/s390/crypto/sha3_512_s390.c index 018f02fff444..a5c9690eecb1 100644 --- a/arch/s390/crypto/sha3_512_s390.c +++ b/arch/s390/crypto/sha3_512_s390.c @@ -34,24 +34,33 @@ static int sha3_512_init(struct shash_desc *desc) static int sha3_512_export(struct shash_desc *desc, void *out) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - struct sha3_state *octx = out; - + union { + u8 *u8; + u64 *u64; + } p = { .u8 = out }; + int i; if (sctx->first_message_part) { - memset(sctx->state, 0, sizeof(sctx->state)); - sctx->first_message_part = 0; + memset(out, 0, SHA3_STATE_SIZE); + return 0; } - memcpy(octx->st, sctx->state, sizeof(octx->st)); + for (i = 0; i < SHA3_STATE_SIZE / 8; i++) + put_unaligned(le64_to_cpu(sctx->sha3.state[i]), p.u64++); return 0; } static int sha3_512_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); - const struct sha3_state *ictx = in; + union { + const u8 *u8; + const u64 *u64; + } p = { .u8 = in }; + int i; + for (i = 0; i < SHA3_STATE_SIZE / 8; i++) + sctx->sha3.state[i] = cpu_to_le64(get_unaligned(p.u64++)); sctx->count = 0; - memcpy(sctx->state, ictx->st, sizeof(ictx->st)); sctx->first_message_part = 0; sctx->func = CPACF_KIMD_SHA3_512; From 71203f68c7749609d7fc8ae6ad054bdedeb24f91 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 24 May 2025 20:32:20 +0800 Subject: [PATCH 027/116] padata: Fix pd UAF once and for all There is a race condition/UAF in padata_reorder that goes back to the initial commit. A reference count is taken at the start of the process in padata_do_parallel, and released at the end in padata_serial_worker. This reference count is (and only is) required for padata_replace to function correctly. If padata_replace is never called then there is no issue. In the function padata_reorder which serves as the core of padata, as soon as padata is added to queue->serial.list, and the associated spin lock released, that padata may be processed and the reference count on pd would go away. Fix this by getting the next padata before the squeue->serial lock is released. In order to make this possible, simplify padata_reorder by only calling it once the next padata arrives. Fixes: 16295bec6398 ("padata: Generic parallelization/serialization interface") Signed-off-by: Herbert Xu --- include/linux/padata.h | 3 - kernel/padata.c | 132 ++++++++++++----------------------------- 2 files changed, 37 insertions(+), 98 deletions(-) diff --git a/include/linux/padata.h b/include/linux/padata.h index 0146daf34430..b486c7359de2 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -91,7 +91,6 @@ struct padata_cpumask { * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. * @reorder_work: work struct for reordering. - * @lock: Reorder lock. */ struct parallel_data { struct padata_shell *ps; @@ -102,8 +101,6 @@ struct parallel_data { unsigned int processed; int cpu; struct padata_cpumask cpumask; - struct work_struct reorder_work; - spinlock_t ____cacheline_aligned lock; }; /** diff --git a/kernel/padata.c b/kernel/padata.c index 7eee94166357..25cd3406477a 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -261,20 +261,17 @@ EXPORT_SYMBOL(padata_do_parallel); * be parallel processed by another cpu and is not yet present in * the cpu's reorder queue. */ -static struct padata_priv *padata_find_next(struct parallel_data *pd, - bool remove_object) +static struct padata_priv *padata_find_next(struct parallel_data *pd, int cpu, + unsigned int processed) { struct padata_priv *padata; struct padata_list *reorder; - int cpu = pd->cpu; reorder = per_cpu_ptr(pd->reorder_list, cpu); spin_lock(&reorder->lock); - if (list_empty(&reorder->list)) { - spin_unlock(&reorder->lock); - return NULL; - } + if (list_empty(&reorder->list)) + goto notfound; padata = list_entry(reorder->list.next, struct padata_priv, list); @@ -282,97 +279,52 @@ static struct padata_priv *padata_find_next(struct parallel_data *pd, * Checks the rare case where two or more parallel jobs have hashed to * the same CPU and one of the later ones finishes first. */ - if (padata->seq_nr != pd->processed) { - spin_unlock(&reorder->lock); - return NULL; - } - - if (remove_object) { - list_del_init(&padata->list); - ++pd->processed; - pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu); - } + if (padata->seq_nr != processed) + goto notfound; + list_del_init(&padata->list); spin_unlock(&reorder->lock); return padata; + +notfound: + pd->processed = processed; + pd->cpu = cpu; + spin_unlock(&reorder->lock); + return NULL; } -static void padata_reorder(struct parallel_data *pd) +static void padata_reorder(struct padata_priv *padata) { + struct parallel_data *pd = padata->pd; struct padata_instance *pinst = pd->ps->pinst; - int cb_cpu; - struct padata_priv *padata; - struct padata_serial_queue *squeue; - struct padata_list *reorder; + unsigned int processed; + int cpu; - /* - * We need to ensure that only one cpu can work on dequeueing of - * the reorder queue the time. Calculating in which percpu reorder - * queue the next object will arrive takes some time. A spinlock - * would be highly contended. Also it is not clear in which order - * the objects arrive to the reorder queues. So a cpu could wait to - * get the lock just to notice that there is nothing to do at the - * moment. Therefore we use a trylock and let the holder of the lock - * care for all the objects enqueued during the holdtime of the lock. - */ - if (!spin_trylock_bh(&pd->lock)) - return; + processed = pd->processed; + cpu = pd->cpu; - while (1) { - padata = padata_find_next(pd, true); + do { + struct padata_serial_queue *squeue; + int cb_cpu; - /* - * If the next object that needs serialization is parallel - * processed by another cpu and is still on it's way to the - * cpu's reorder queue, nothing to do for now. - */ - if (!padata) - break; + cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu); + processed++; cb_cpu = padata->cb_cpu; squeue = per_cpu_ptr(pd->squeue, cb_cpu); spin_lock(&squeue->serial.lock); list_add_tail(&padata->list, &squeue->serial.list); - spin_unlock(&squeue->serial.lock); - queue_work_on(cb_cpu, pinst->serial_wq, &squeue->work); - } - spin_unlock_bh(&pd->lock); - - /* - * The next object that needs serialization might have arrived to - * the reorder queues in the meantime. - * - * Ensure reorder queue is read after pd->lock is dropped so we see - * new objects from another task in padata_do_serial. Pairs with - * smp_mb in padata_do_serial. - */ - smp_mb(); - - reorder = per_cpu_ptr(pd->reorder_list, pd->cpu); - if (!list_empty(&reorder->list) && padata_find_next(pd, false)) { /* - * Other context(eg. the padata_serial_worker) can finish the request. - * To avoid UAF issue, add pd ref here, and put pd ref after reorder_work finish. + * If the next object that needs serialization is parallel + * processed by another cpu and is still on it's way to the + * cpu's reorder queue, end the loop. */ - padata_get_pd(pd); - if (!queue_work(pinst->serial_wq, &pd->reorder_work)) - padata_put_pd(pd); - } -} - -static void invoke_padata_reorder(struct work_struct *work) -{ - struct parallel_data *pd; - - local_bh_disable(); - pd = container_of(work, struct parallel_data, reorder_work); - padata_reorder(pd); - local_bh_enable(); - /* Pairs with putting the reorder_work in the serial_wq */ - padata_put_pd(pd); + padata = padata_find_next(pd, cpu, processed); + spin_unlock(&squeue->serial.lock); + } while (padata); } static void padata_serial_worker(struct work_struct *serial_work) @@ -423,6 +375,7 @@ void padata_do_serial(struct padata_priv *padata) struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu); struct padata_priv *cur; struct list_head *pos; + bool gotit = true; spin_lock(&reorder->lock); /* Sort in ascending order of sequence number. */ @@ -432,17 +385,14 @@ void padata_do_serial(struct padata_priv *padata) if ((signed int)(cur->seq_nr - padata->seq_nr) < 0) break; } - list_add(&padata->list, pos); + if (padata->seq_nr != pd->processed) { + gotit = false; + list_add(&padata->list, pos); + } spin_unlock(&reorder->lock); - /* - * Ensure the addition to the reorder list is ordered correctly - * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb - * in padata_reorder. - */ - smp_mb(); - - padata_reorder(pd); + if (gotit) + padata_reorder(padata); } EXPORT_SYMBOL(padata_do_serial); @@ -632,9 +582,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_shell *ps) padata_init_squeues(pd); pd->seq_nr = -1; refcount_set(&pd->refcnt, 1); - spin_lock_init(&pd->lock); pd->cpu = cpumask_first(pd->cpumask.pcpu); - INIT_WORK(&pd->reorder_work, invoke_padata_reorder); return pd; @@ -1144,12 +1092,6 @@ void padata_free_shell(struct padata_shell *ps) if (!ps) return; - /* - * Wait for all _do_serial calls to finish to avoid touching - * freed pd's and ps's. - */ - synchronize_rcu(); - mutex_lock(&ps->pinst->lock); list_del(&ps->list); pd = rcu_dereference_protected(ps->pd, 1); From fbfe4f47d931dc36fccec66546599663e08f4943 Mon Sep 17 00:00:00 2001 From: Amit Singh Tomar Date: Wed, 28 May 2025 20:29:40 +0530 Subject: [PATCH 028/116] crypto: octeontx2 - Rework how engine group number is obtained By default, otx2_cpt_get_kcrypto_eng_grp_num() returns the engine group number of SE engine type. Add an engine type parameter to support retrieving the engine group number for different engine types. Since otx2_cpt_get_kcrypto_eng_grp_num() always returns the kernel crypto engine group number, rename it to otx2_cpt_get_eng_grp_num(). Signed-off-by: Amit Singh Tomar Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h | 3 ++- drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 2 +- drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c | 6 ++++-- drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c | 13 +++++++------ drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c | 5 ++++- .../crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c | 12 ++++++++++-- 6 files changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h index 90a031421aac..e64ca30335de 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -555,6 +555,7 @@ struct otx2_cptlf_wqe; int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, int cpu_num); void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe); -int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev); +int otx2_cpt_get_eng_grp_num(struct pci_dev *pdev, + enum otx2_cpt_eng_type); #endif /* __OTX2_CPT_REQMGR_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h index 6e004a5568d8..49ec2b92e86d 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h @@ -124,7 +124,7 @@ struct otx2_cptlfs_info { struct cpt_hw_ops *ops; u8 are_lfs_attached; /* Whether CPT LFs are attached */ u8 lfs_num; /* Number of CPT LFs */ - u8 kcrypto_eng_grp_num; /* Kernel crypto engine group number */ + u8 kcrypto_se_eng_grp_num; /* Crypto symmetric engine group number */ u8 kvf_limits; /* Kernel crypto limits */ atomic_t state; /* LF's state. started/reset */ int blkaddr; /* CPT blkaddr: BLKADDR_CPT0/BLKADDR_CPT1 */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c index 7eb0bc13994d..8d9f394d6b50 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -384,7 +384,8 @@ static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) req_info->req_type = OTX2_CPT_ENC_DEC_REQ; req_info->is_enc = enc; req_info->is_trunc_hmac = false; - req_info->ctrl.s.grp = otx2_cpt_get_kcrypto_eng_grp_num(pdev); + req_info->ctrl.s.grp = otx2_cpt_get_eng_grp_num(pdev, + OTX2_CPT_SE_TYPES); req_info->req.cptr = ctx->er_ctx.hw_ctx; req_info->req.cptr_dma = ctx->er_ctx.cptr_dma; @@ -1288,7 +1289,8 @@ static int cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) if (status) return status; - req_info->ctrl.s.grp = otx2_cpt_get_kcrypto_eng_grp_num(pdev); + req_info->ctrl.s.grp = otx2_cpt_get_eng_grp_num(pdev, + OTX2_CPT_SE_TYPES); /* * We perform an asynchronous send and once diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 56904bdfd6e8..79adc224066e 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -265,17 +265,18 @@ static int cptvf_lf_init(struct otx2_cptvf_dev *cptvf) u8 eng_grp_msk; /* Get engine group number for symmetric crypto */ - cptvf->lfs.kcrypto_eng_grp_num = OTX2_CPT_INVALID_CRYPTO_ENG_GRP; + cptvf->lfs.kcrypto_se_eng_grp_num = OTX2_CPT_INVALID_CRYPTO_ENG_GRP; ret = otx2_cptvf_send_eng_grp_num_msg(cptvf, OTX2_CPT_SE_TYPES); if (ret) return ret; - if (cptvf->lfs.kcrypto_eng_grp_num == OTX2_CPT_INVALID_CRYPTO_ENG_GRP) { - dev_err(dev, "Engine group for kernel crypto not available\n"); - ret = -ENOENT; - return ret; + if (cptvf->lfs.kcrypto_se_eng_grp_num == + OTX2_CPT_INVALID_CRYPTO_ENG_GRP) { + dev_err(dev, + "Symmetric Engine group for crypto not available\n"); + return -ENOENT; } - eng_grp_msk = 1 << cptvf->lfs.kcrypto_eng_grp_num; + eng_grp_msk = 1 << cptvf->lfs.kcrypto_se_eng_grp_num; ret = otx2_cptvf_send_kvf_limits_msg(cptvf); if (ret) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c index 931b72580fd9..f36d75f40014 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c @@ -75,6 +75,7 @@ static void process_pfvf_mbox_mbox_msg(struct otx2_cptvf_dev *cptvf, struct otx2_cpt_caps_rsp *eng_caps; struct cpt_rd_wr_reg_msg *rsp_reg; struct msix_offset_rsp *rsp_msix; + u8 grp_num; int i; if (msg->id >= MBOX_MSG_MAX) { @@ -122,7 +123,9 @@ static void process_pfvf_mbox_mbox_msg(struct otx2_cptvf_dev *cptvf, break; case MBOX_MSG_GET_ENG_GRP_NUM: rsp_grp = (struct otx2_cpt_egrp_num_rsp *) msg; - cptvf->lfs.kcrypto_eng_grp_num = rsp_grp->eng_grp_num; + grp_num = rsp_grp->eng_grp_num; + if (rsp_grp->eng_type == OTX2_CPT_SE_TYPES) + cptvf->lfs.kcrypto_se_eng_grp_num = grp_num; break; case MBOX_MSG_GET_KVF_LIMITS: rsp_limits = (struct otx2_cpt_kvf_limits_rsp *) msg; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c index 426244107037..8b4ac269330a 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c @@ -391,9 +391,17 @@ void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe) &wqe->lfs->lf[wqe->lf_num].pqueue); } -int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev) +int otx2_cpt_get_eng_grp_num(struct pci_dev *pdev, + enum otx2_cpt_eng_type eng_type) { struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev); - return cptvf->lfs.kcrypto_eng_grp_num; + switch (eng_type) { + case OTX2_CPT_SE_TYPES: + return cptvf->lfs.kcrypto_se_eng_grp_num; + default: + dev_err(&cptvf->pdev->dev, "Unsupported engine type"); + break; + } + return -ENXIO; } From 1b9209d57ac3c3e779d68da907505175b6b058b0 Mon Sep 17 00:00:00 2001 From: Amit Singh Tomar Date: Wed, 28 May 2025 20:29:41 +0530 Subject: [PATCH 029/116] crypto: octeontx2 - get engine group number for asymmetric engine Cryptographic Accelerator Unit (CPT) support different engine groups, one for asymmetric algorithms (only AE engines in this group), one for the most common symmetric algorithms (all SE and all IE engines in this group), and one for other symmetric algorithms (only SE engines in this group). For symmetric engine (SE), we obtain the group number using "MBOX_MSG_GET_ENG_GRP_NUM" mailbox. Let's follow a similar approach to determine the group number for asymmetric engine (AE). Signed-off-by: Amit Singh Tomar Signed-off-by: Herbert Xu --- drivers/crypto/marvell/octeontx2/otx2_cptlf.h | 1 + .../crypto/marvell/octeontx2/otx2_cptvf_main.c | 17 ++++++++++++++++- .../crypto/marvell/octeontx2/otx2_cptvf_mbox.c | 2 ++ .../marvell/octeontx2/otx2_cptvf_reqmgr.c | 2 ++ 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h index 49ec2b92e86d..1b9f75214d18 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h @@ -125,6 +125,7 @@ struct otx2_cptlfs_info { u8 are_lfs_attached; /* Whether CPT LFs are attached */ u8 lfs_num; /* Number of CPT LFs */ u8 kcrypto_se_eng_grp_num; /* Crypto symmetric engine group number */ + u8 kcrypto_ae_eng_grp_num; /* Crypto asymmetric engine group number */ u8 kvf_limits; /* Kernel crypto limits */ atomic_t state; /* LF's state. started/reset */ int blkaddr; /* CPT blkaddr: BLKADDR_CPT0/BLKADDR_CPT1 */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c index 79adc224066e..c1c44a7b89fa 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -276,7 +276,22 @@ static int cptvf_lf_init(struct otx2_cptvf_dev *cptvf) "Symmetric Engine group for crypto not available\n"); return -ENOENT; } - eng_grp_msk = 1 << cptvf->lfs.kcrypto_se_eng_grp_num; + + /* Get engine group number for asymmetric crypto */ + cptvf->lfs.kcrypto_ae_eng_grp_num = OTX2_CPT_INVALID_CRYPTO_ENG_GRP; + ret = otx2_cptvf_send_eng_grp_num_msg(cptvf, OTX2_CPT_AE_TYPES); + if (ret) + return ret; + + if (cptvf->lfs.kcrypto_ae_eng_grp_num == + OTX2_CPT_INVALID_CRYPTO_ENG_GRP) { + dev_err(dev, + "Asymmetric Engine group for crypto not available\n"); + return -ENOENT; + } + + eng_grp_msk = BIT(cptvf->lfs.kcrypto_se_eng_grp_num) | + BIT(cptvf->lfs.kcrypto_ae_eng_grp_num); ret = otx2_cptvf_send_kvf_limits_msg(cptvf); if (ret) diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c index f36d75f40014..3078e2375d3b 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c @@ -126,6 +126,8 @@ static void process_pfvf_mbox_mbox_msg(struct otx2_cptvf_dev *cptvf, grp_num = rsp_grp->eng_grp_num; if (rsp_grp->eng_type == OTX2_CPT_SE_TYPES) cptvf->lfs.kcrypto_se_eng_grp_num = grp_num; + else if (rsp_grp->eng_type == OTX2_CPT_AE_TYPES) + cptvf->lfs.kcrypto_ae_eng_grp_num = grp_num; break; case MBOX_MSG_GET_KVF_LIMITS: rsp_limits = (struct otx2_cpt_kvf_limits_rsp *) msg; diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c index 8b4ac269330a..e71494486c64 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c @@ -399,6 +399,8 @@ int otx2_cpt_get_eng_grp_num(struct pci_dev *pdev, switch (eng_type) { case OTX2_CPT_SE_TYPES: return cptvf->lfs.kcrypto_se_eng_grp_num; + case OTX2_CPT_AE_TYPES: + return cptvf->lfs.kcrypto_ae_eng_grp_num; default: dev_err(&cptvf->pdev->dev, "Unsupported engine type"); break; From 0fa766726c091ff0ec7d26874f6e4724d23ecb0e Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Wed, 28 May 2025 20:20:18 +0000 Subject: [PATCH 030/116] crypto: ccp - Fix dereferencing uninitialized error pointer Fix below smatch warnings: drivers/crypto/ccp/sev-dev.c:1312 __sev_platform_init_locked() error: we previously assumed 'error' could be null Fixes: 9770b428b1a2 ("crypto: ccp - Move dev_info/err messages for SEV/SNP init and shutdown") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202505071746.eWOx5QgC-lkp@intel.com/ Signed-off-by: Ashish Kalra Reviewed-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 3451bada884e..8fb94c5f006a 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1276,9 +1276,11 @@ static int __sev_platform_init_handle_init_ex_path(struct sev_device *sev) static int __sev_platform_init_locked(int *error) { - int rc, psp_ret = SEV_RET_NO_FW_CALL; + int rc, psp_ret, dfflush_error; struct sev_device *sev; + psp_ret = dfflush_error = SEV_RET_NO_FW_CALL; + if (!psp_master || !psp_master->sev_data) return -ENODEV; @@ -1320,10 +1322,10 @@ static int __sev_platform_init_locked(int *error) /* Prepare for first SEV guest launch after INIT */ wbinvd_on_all_cpus(); - rc = __sev_do_cmd_locked(SEV_CMD_DF_FLUSH, NULL, error); + rc = __sev_do_cmd_locked(SEV_CMD_DF_FLUSH, NULL, &dfflush_error); if (rc) { dev_err(sev->dev, "SEV: DF_FLUSH failed %#x, rc %d\n", - *error, rc); + dfflush_error, rc); return rc; } From 53669ff591d4deb2d80eed4c07593ad0c0b45899 Mon Sep 17 00:00:00 2001 From: Ahsan Atta Date: Wed, 4 Jun 2025 09:23:43 +0100 Subject: [PATCH 031/116] crypto: qat - allow enabling VFs in the absence of IOMMU The commit ca88a2bdd4dd ("crypto: qat - allow disabling SR-IOV VFs") introduced an unnecessary change that prevented enabling SR-IOV when IOMMU is disabled. In certain scenarios, it is desirable to enable SR-IOV even in the absence of IOMMU. Thus, restoring the previous functionality to allow VFs to be enumerated in the absence of IOMMU. Fixes: ca88a2bdd4dd ("crypto: qat - allow disabling SR-IOV VFs") Signed-off-by: Ahsan Atta Reviewed-by: Giovanni Cabiddu Reviewed-by: Michal Witwicki Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_sriov.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_sriov.c b/drivers/crypto/intel/qat/qat_common/adf_sriov.c index c75d0b6cb0ad..31d1ef0cb1f5 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sriov.c @@ -155,7 +155,6 @@ static int adf_do_enable_sriov(struct adf_accel_dev *accel_dev) if (!device_iommu_mapped(&GET_DEV(accel_dev))) { dev_warn(&GET_DEV(accel_dev), "IOMMU should be enabled for SR-IOV to work correctly\n"); - return -EINVAL; } if (adf_dev_started(accel_dev)) { From 254923ca8715f623704378266815b6d14eb26194 Mon Sep 17 00:00:00 2001 From: Svyatoslav Pankratov Date: Wed, 4 Jun 2025 16:59:56 +0100 Subject: [PATCH 032/116] crypto: qat - fix state restore for banks with exceptions Change the logic in the restore function to properly handle bank exceptions. The check for exceptions in the saved state should be performed before conducting any other ringstat register checks. If a bank was saved with an exception, the ringstat will have the appropriate rp_halt/rp_exception bits set, causing the driver to exit the restore process with an error. Instead, the restore routine should first check the ringexpstat register, and if any exception was raised, it should stop further checks and return without any error. In other words, if a ring pair is in an exception state at the source, it should be restored the same way at the destination but without raising an error. Even though this approach might lead to losing the exception state during migration, the driver will log the exception from the saved state during the restore process. Signed-off-by: Svyatoslav Pankratov Fixes: bbfdde7d195f ("crypto: qat - add bank save and restore flows") Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../intel/qat/qat_common/adf_gen4_hw_data.c | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index 0406cb09c5bb..14d0fdd66a4b 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -581,6 +581,28 @@ static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, ops->write_csr_int_srcsel_w_val(base, bank, state->iaintflagsrcsel0); ops->write_csr_exp_int_en(base, bank, state->ringexpintenable); ops->write_csr_int_col_ctl(base, bank, state->iaintcolctl); + + /* + * Verify whether any exceptions were raised during the bank save process. + * If exceptions occurred, the status and exception registers cannot + * be directly restored. Consequently, further restoration is not + * feasible, and the current state of the ring should be maintained. + */ + val = state->ringexpstat; + if (val) { + pr_info("QAT: Bank %u state not fully restored due to exception in saved state (%#x)\n", + bank, val); + return 0; + } + + /* Ensure that the restoration process completed without exceptions */ + tmp_val = ops->read_csr_exp_stat(base, bank); + if (tmp_val) { + pr_err("QAT: Bank %u restored with exception: %#x\n", + bank, tmp_val); + return -EFAULT; + } + ops->write_csr_ring_srv_arb_en(base, bank, state->ringsrvarben); /* Check that all ring statuses match the saved state. */ @@ -614,13 +636,6 @@ static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, if (ret) return ret; - tmp_val = ops->read_csr_exp_stat(base, bank); - val = state->ringexpstat; - if (tmp_val && !val) { - pr_err("QAT: Bank was restored with exception: 0x%x\n", val); - return -EINVAL; - } - return 0; } From ea87e6c40a79de84cc2c6186eafb476be79916e0 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Wed, 4 Jun 2025 16:47:40 -0400 Subject: [PATCH 033/116] crypto: pcrypt - Optimize pcrypt_aead_init_tfm() The function opencodes cpumask_nth(). The dedicated helper is faster than an open for-loop. Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Herbert Xu --- crypto/pcrypt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index c33d29a523e0..c3a9d4f2995c 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -178,7 +178,7 @@ static int pcrypt_aead_decrypt(struct aead_request *req) static int pcrypt_aead_init_tfm(struct crypto_aead *tfm) { - int cpu, cpu_index; + int cpu_index; struct aead_instance *inst = aead_alg_instance(tfm); struct pcrypt_instance_ctx *ictx = aead_instance_ctx(inst); struct pcrypt_aead_ctx *ctx = crypto_aead_ctx(tfm); @@ -187,10 +187,7 @@ static int pcrypt_aead_init_tfm(struct crypto_aead *tfm) cpu_index = (unsigned int)atomic_inc_return(&ictx->tfm_count) % cpumask_weight(cpu_online_mask); - ctx->cb_cpu = cpumask_first(cpu_online_mask); - for (cpu = 0; cpu < cpu_index; cpu++) - ctx->cb_cpu = cpumask_next(ctx->cb_cpu, cpu_online_mask); - + ctx->cb_cpu = cpumask_nth(cpu_index, cpu_online_mask); cipher = crypto_spawn_aead(&ictx->spawn); if (IS_ERR(cipher)) From 4b7ed1ce411e5049b3c842009981c9c5191fea49 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Wed, 4 Jun 2025 16:47:41 -0400 Subject: [PATCH 034/116] crypto: caam - Fix opencoded cpumask_next_wrap() in caam_drv_ctx_init() The dedicated cpumask_next_wrap() is more verbose and better optimized comparing to cpumask_next() followed by cpumask_first(). Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Herbert Xu --- drivers/crypto/caam/qi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c index b6e7c0b29d4e..1e731ed8702b 100644 --- a/drivers/crypto/caam/qi.c +++ b/drivers/crypto/caam/qi.c @@ -442,11 +442,8 @@ struct caam_drv_ctx *caam_drv_ctx_init(struct device *qidev, if (!cpumask_test_cpu(*cpu, cpus)) { int *pcpu = &get_cpu_var(last_cpu); - *pcpu = cpumask_next(*pcpu, cpus); - if (*pcpu >= nr_cpu_ids) - *pcpu = cpumask_first(cpus); + *pcpu = cpumask_next_wrap(*pcpu, cpus); *cpu = *pcpu; - put_cpu_var(last_cpu); } drv_ctx->cpu = *cpu; From 758f5bdf1bef2c3820de38283bc35cf668b85f9c Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Wed, 4 Jun 2025 19:58:01 -0400 Subject: [PATCH 035/116] padata: use cpumask_nth() padata_do_parallel() and padata_index_to_cpu() duplicate cpumask_nth(). Fix both and use the generic helper. Signed-off-by: Yury Norov Signed-off-by: Herbert Xu --- kernel/padata.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 25cd3406477a..f85f8bd788d0 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -63,17 +63,6 @@ static inline void padata_put_pd(struct parallel_data *pd) padata_put_pd_cnt(pd, 1); } -static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) -{ - int cpu, target_cpu; - - target_cpu = cpumask_first(pd->cpumask.pcpu); - for (cpu = 0; cpu < cpu_index; cpu++) - target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu); - - return target_cpu; -} - static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr) { /* @@ -82,7 +71,7 @@ static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr) */ int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu); - return padata_index_to_cpu(pd, cpu_index); + return cpumask_nth(cpu_index, pd->cpumask.pcpu); } static struct padata_work *padata_work_alloc(void) @@ -192,9 +181,9 @@ int padata_do_parallel(struct padata_shell *ps, struct padata_priv *padata, int *cb_cpu) { struct padata_instance *pinst = ps->pinst; - int i, cpu, cpu_index, err; struct parallel_data *pd; struct padata_work *pw; + int cpu_index, err; rcu_read_lock_bh(); @@ -210,12 +199,7 @@ int padata_do_parallel(struct padata_shell *ps, /* Select an alternate fallback CPU and notify the caller. */ cpu_index = *cb_cpu % cpumask_weight(pd->cpumask.cbcpu); - - cpu = cpumask_first(pd->cpumask.cbcpu); - for (i = 0; i < cpu_index; i++) - cpu = cpumask_next(cpu, pd->cpumask.cbcpu); - - *cb_cpu = cpu; + *cb_cpu = cpumask_nth(cpu_index, pd->cpumask.cbcpu); } err = -EBUSY; From 8f2e1a3cd78852f3a5ceef3367a0ce942928cee7 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 5 Jun 2025 12:25:26 +0100 Subject: [PATCH 036/116] crypto: qat - add support for decompression service to GEN6 devices Add support to configure decompression as a separate service for QAT GEN6 devices. A new arbiter configuration has been added to map the hardware decompression threads to all ring pairs. The decompression service is enabled via sysfs by writing "decomp" to "/sys/bus/pci/devices//qat/cfg_services". The decompression service is not supported on QAT GEN2 and GEN4 devices, and attempting it results in an invalid write error. The existing compression service for QAT GEN2 and GEN4 devices remains unchanged and supports both compression and decompression operations on the same ring pair. Co-developed-by: Karthikeyan Gopal Signed-off-by: Karthikeyan Gopal Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c | 13 ++++++++++++- .../crypto/intel/qat/qat_common/adf_cfg_common.h | 1 + .../crypto/intel/qat/qat_common/adf_cfg_services.c | 5 +++++ .../crypto/intel/qat/qat_common/adf_cfg_services.h | 1 + .../crypto/intel/qat/qat_common/adf_cfg_strings.h | 1 + .../crypto/intel/qat/qat_common/adf_gen4_hw_data.c | 3 +++ drivers/crypto/intel/qat/qat_common/adf_sysfs.c | 2 ++ 7 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c index 359a6447ccb8..48a29a102dd0 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c @@ -76,6 +76,10 @@ static const unsigned long thrd_mask_dcc[ADF_6XXX_MAX_ACCELENGINES] = { 0x00, 0x00, 0x00, 0x00, 0x07, 0x07, 0x03, 0x03, 0x00 }; +static const unsigned long thrd_mask_dcpr[ADF_6XXX_MAX_ACCELENGINES] = { + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00 +}; + static const char *const adf_6xxx_fw_objs[] = { [ADF_FW_CY_OBJ] = ADF_6XXX_CY_OBJ, [ADF_FW_DC_OBJ] = ADF_6XXX_DC_OBJ, @@ -126,6 +130,9 @@ static int get_service(unsigned long *mask) if (test_and_clear_bit(SVC_DCC, mask)) return SVC_DCC; + if (test_and_clear_bit(SVC_DECOMP, mask)) + return SVC_DECOMP; + return -EINVAL; } @@ -139,6 +146,8 @@ static enum adf_cfg_service_type get_ring_type(enum adf_services service) case SVC_DC: case SVC_DCC: return COMP; + case SVC_DECOMP: + return DECOMP; default: return UNUSED; } @@ -155,6 +164,8 @@ static const unsigned long *get_thrd_mask(enum adf_services service) return thrd_mask_cpr; case SVC_DCC: return thrd_mask_dcc; + case SVC_DECOMP: + return thrd_mask_dcpr; default: return NULL; } @@ -648,7 +659,7 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) caps |= capabilities_asym; if (test_bit(SVC_SYM, &mask)) caps |= capabilities_sym; - if (test_bit(SVC_DC, &mask)) + if (test_bit(SVC_DC, &mask) || test_bit(SVC_DECOMP, &mask)) caps |= capabilities_dc; if (test_bit(SVC_DCC, &mask)) { /* diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h index 15fdf9854b81..81e9e9d7eccd 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h @@ -29,6 +29,7 @@ enum adf_cfg_service_type { COMP, SYM, ASYM, + DECOMP, USED }; diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c index c39871291da7..f49227b10064 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c @@ -15,6 +15,7 @@ static const char *const adf_cfg_services[] = { [SVC_SYM] = ADF_CFG_SYM, [SVC_DC] = ADF_CFG_DC, [SVC_DCC] = ADF_CFG_DCC, + [SVC_DECOMP] = ADF_CFG_DECOMP, }; /* @@ -43,6 +44,7 @@ static_assert(BITS_PER_LONG >= SVC_BASE_COUNT); static_assert(sizeof(ADF_CFG_SYM ADF_SERVICES_DELIMITER ADF_CFG_ASYM ADF_SERVICES_DELIMITER ADF_CFG_DC ADF_SERVICES_DELIMITER + ADF_CFG_DECOMP ADF_SERVICES_DELIMITER ADF_CFG_DCC) < ADF_CFG_MAX_VAL_LEN_IN_BYTES); static int adf_service_string_to_mask(struct adf_accel_dev *accel_dev, const char *buf, @@ -167,6 +169,9 @@ int adf_get_service_enabled(struct adf_accel_dev *accel_dev) if (test_bit(SVC_DC, &mask)) return SVC_DC; + if (test_bit(SVC_DECOMP, &mask)) + return SVC_DECOMP; + if (test_bit(SVC_DCC, &mask)) return SVC_DCC; diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h index 3742c450878f..8709b7a52907 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h @@ -11,6 +11,7 @@ enum adf_services { SVC_ASYM = 0, SVC_SYM, SVC_DC, + SVC_DECOMP, SVC_DCC, SVC_BASE_COUNT }; diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h index b79982c4a856..30107a02ee7f 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h @@ -24,6 +24,7 @@ #define ADF_CY "Cy" #define ADF_DC "Dc" #define ADF_CFG_DC "dc" +#define ADF_CFG_DECOMP "decomp" #define ADF_CFG_CY "sym;asym" #define ADF_CFG_SYM "sym" #define ADF_CFG_ASYM "asym" diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index 14d0fdd66a4b..258adc0b49e0 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -262,6 +262,9 @@ bool adf_gen4_services_supported(unsigned long mask) if (mask >= BIT(SVC_BASE_COUNT)) return false; + if (test_bit(SVC_DECOMP, &mask)) + return false; + switch (num_svc) { case ADF_ONE_SERVICE: return true; diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c index 6c39194647f0..79c63dfa8ff3 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c @@ -269,6 +269,8 @@ static ssize_t rp2srv_show(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%s\n", ADF_CFG_SYM); case ASYM: return sysfs_emit(buf, "%s\n", ADF_CFG_ASYM); + case DECOMP: + return sysfs_emit(buf, "%s\n", ADF_CFG_DECOMP); default: break; } From 1029436218e50168812dbc44b16bca6d35721b0b Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 5 Jun 2025 12:25:27 +0100 Subject: [PATCH 037/116] Documentation: qat: update sysfs-driver-qat for GEN6 devices Reorganize and expand documentation for service configurations. This reworks the `cfg_services` section by removing explicit service combinations (e.g., asym;sym, sym;asym) and clarifying that multiple services can be configured in any order. Update the documentation to reflect that the attribute previously limited to qat_4xxx devices is also applicable to qat_6xxx devices. Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Reviewed-by: Fiona Trahe Signed-off-by: Herbert Xu --- Documentation/ABI/testing/sysfs-driver-qat | 50 ++++++++++++---------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-qat b/Documentation/ABI/testing/sysfs-driver-qat index f290e77cd590..b0561b9fc4eb 100644 --- a/Documentation/ABI/testing/sysfs-driver-qat +++ b/Documentation/ABI/testing/sysfs-driver-qat @@ -14,7 +14,7 @@ Description: (RW) Reports the current state of the QAT device. Write to It is possible to transition the device from up to down only if the device is up and vice versa. - This attribute is only available for qat_4xxx devices. + This attribute is available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat/cfg_services Date: June 2022 @@ -23,24 +23,28 @@ Contact: qat-linux@intel.com Description: (RW) Reports the current configuration of the QAT device. Write to the file to change the configured services. - The values are: + One or more services can be enabled per device. + Certain configurations are restricted to specific device types; + where applicable this is explicitly indicated, for example + (qat_6xxx) denotes applicability exclusively to that device series. - * sym;asym: the device is configured for running crypto - services - * asym;sym: identical to sym;asym - * dc: the device is configured for running compression services - * dcc: identical to dc but enables the dc chaining feature, - hash then compression. If this is not required chose dc - * sym: the device is configured for running symmetric crypto - services - * asym: the device is configured for running asymmetric crypto - services - * asym;dc: the device is configured for running asymmetric - crypto services and compression services - * dc;asym: identical to asym;dc - * sym;dc: the device is configured for running symmetric crypto - services and compression services - * dc;sym: identical to sym;dc + The available services include: + + * sym: Configures the device for symmetric cryptographic operations. + * asym: Configures the device for asymmetric cryptographic operations. + * dc: Configures the device for compression and decompression + operations. + * dcc: Similar to dc, but with the additional dc chaining feature + enabled, cipher then compress (qat_6xxx), hash then compression. + If this is not required choose dc. + * decomp: Configures the device for decompression operations (qat_6xxx). + + Service combinations are permitted for all services except dcc. + On QAT GEN4 devices (qat_4xxx driver) a maximum of two services can be + combined and on QAT GEN6 devices (qat_6xxx driver ) a maximum of three + services can be combined. + The order of services is not significant. For instance, sym;asym is + functionally equivalent to asym;sym. It is possible to set the configuration only if the device is in the `down` state (see /sys/bus/pci/devices//qat/state) @@ -59,7 +63,7 @@ Description: (RW) Reports the current configuration of the QAT device. # cat /sys/bus/pci/devices//qat/cfg_services dc - This attribute is only available for qat_4xxx devices. + This attribute is available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat/pm_idle_enabled Date: June 2023 @@ -94,7 +98,7 @@ Description: (RW) This configuration option provides a way to force the device i # cat /sys/bus/pci/devices//qat/pm_idle_enabled 0 - This attribute is only available for qat_4xxx devices. + This attribute is available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat/rp2srv Date: January 2024 @@ -126,7 +130,7 @@ Description: # cat /sys/bus/pci/devices//qat/rp2srv sym - This attribute is only available for qat_4xxx devices. + This attribute is available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat/num_rps Date: January 2024 @@ -140,7 +144,7 @@ Description: # cat /sys/bus/pci/devices//qat/num_rps 64 - This attribute is only available for qat_4xxx devices. + This attribute is available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat/auto_reset Date: May 2024 @@ -160,4 +164,4 @@ Description: (RW) Reports the current state of the autoreset feature * 0/Nn/off: auto reset disabled. If the device encounters an unrecoverable error, it will not be reset. - This attribute is only available for qat_4xxx devices. + This attribute is available for qat_4xxx and qat_6xxx devices. From 82a0302e7167d0b7c6cde56613db3748f8dd806d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 16 Jun 2025 16:38:49 +0800 Subject: [PATCH 038/116] padata: Remove comment for reorder_work Remove comment for reorder_work which no longer exists. Reported-by: Stephen Rothwell Fixes: 71203f68c774 ("padata: Fix pd UAF once and for all") Signed-off-by: Herbert Xu --- include/linux/padata.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/padata.h b/include/linux/padata.h index b486c7359de2..765f2778e264 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -90,7 +90,6 @@ struct padata_cpumask { * @processed: Number of already processed objects. * @cpu: Next CPU to be processed. * @cpumask: The cpumasks in use for parallel and serial workers. - * @reorder_work: work struct for reordering. */ struct parallel_data { struct padata_shell *ps; From d5fa96dc5590915f060fee3209143313e4f5b03b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 10 Jun 2025 11:32:52 +0200 Subject: [PATCH 039/116] crypto: arm/aes-neonbs - work around gcc-15 warning I get a very rare -Wstringop-overread warning with gcc-15 for one function in aesbs_ctr_encrypt(): arch/arm/crypto/aes-neonbs-glue.c: In function 'ctr_encrypt': arch/arm/crypto/aes-neonbs-glue.c:212:1446: error: '__builtin_memcpy' offset [17, 2147483647] is out of the bounds [0, 16] of object 'buf' with type 'u8[16]' {aka 'unsigned char[16]'} [-Werror=array-bounds=] 212 | src = dst = memcpy(buf + sizeof(buf) - bytes, arch/arm/crypto/aes-neonbs-glue.c: In function 'ctr_encrypt': arch/arm/crypto/aes-neonbs-glue.c:218:17: error: 'aesbs_ctr_encrypt' reading 1 byte from a region of size 0 [-Werror=stringop-overread] 218 | aesbs_ctr_encrypt(dst, src, ctx->rk, ctx->rounds, bytes, walk.iv); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ arch/arm/crypto/aes-neonbs-glue.c:218:17: note: referencing argument 2 of type 'const u8[0]' {aka 'const unsigned char[]'} arch/arm/crypto/aes-neonbs-glue.c:218:17: note: referencing argument 3 of type 'const u8[0]' {aka 'const unsigned char[]'} arch/arm/crypto/aes-neonbs-glue.c:218:17: note: referencing argument 6 of type 'u8[0]' {aka 'unsigned char[]'} arch/arm/crypto/aes-neonbs-glue.c:36:17: note: in a call to function 'aesbs_ctr_encrypt' 36 | asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], This could happen in theory if walk.nbytes is larger than INT_MAX and gets converted to a negative local variable. Keep the type unsigned like the orignal nbytes to be sure there is no integer overflow. Fixes: c8bf850e991a ("crypto: arm/aes-neonbs-ctr - deal with non-multiples of AES block size") Signed-off-by: Arnd Bergmann Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/arm/crypto/aes-neonbs-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index c60104dc1585..df5afe601e4a 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -206,7 +206,7 @@ static int ctr_encrypt(struct skcipher_request *req) while (walk.nbytes > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - int bytes = walk.nbytes; + unsigned int bytes = walk.nbytes; if (unlikely(bytes < AES_BLOCK_SIZE)) src = dst = memcpy(buf + sizeof(buf) - bytes, From d2b23a8dd88768cdfda504ca17a4984c5ae00693 Mon Sep 17 00:00:00 2001 From: ChengZhenghan Date: Wed, 11 Jun 2025 10:51:31 +0800 Subject: [PATCH 040/116] crypto: x86 - Fix build warnings about export.h I got some build warnings with W=1: arch/x86/coco/sev/core.c: arch/x86/crypto/aria_aesni_avx2_glue.c: warning: EXPORT_SYMBOL() is used, but #include is missing arch/x86/crypto/aria_aesni_avx_glue.c: warning: EXPORT_SYMBOL() is used, but #include is missing arch/x86/crypto/camellia_aesni_avx_glue.c: warning: EXPORT_SYMBOL() is used, but #include is missing arch/x86/crypto/camellia_glue.c: warning: EXPORT_SYMBOL() is used, but #include is missing arch/x86/crypto/curve25519-x86_64.c: warning: EXPORT_SYMBOL() is used, but #include is missing arch/x86/crypto/serpent_avx_glue.c: warning: EXPORT_SYMBOL() is used, but #include is missing arch/x86/crypto/sm4_aesni_avx_glue.c: warning: EXPORT_SYMBOL() is used, but #include is missing arch/x86/crypto/twofish_glue.c: warning: EXPORT_SYMBOL() is used, but #include is missing arch/x86/crypto/twofish_glue_3way.c: warning: EXPORT_SYMBOL() is used, but #include is missing so I fixed these build warnings for x86_64. Signed-off-by: ChengZhenghan Signed-off-by: Herbert Xu --- arch/x86/crypto/aria_aesni_avx2_glue.c | 1 + arch/x86/crypto/aria_aesni_avx_glue.c | 1 + arch/x86/crypto/camellia_aesni_avx_glue.c | 1 + arch/x86/crypto/camellia_glue.c | 1 + arch/x86/crypto/curve25519-x86_64.c | 1 + arch/x86/crypto/serpent_avx_glue.c | 1 + arch/x86/crypto/sm4_aesni_avx_glue.c | 1 + arch/x86/crypto/twofish_glue.c | 1 + arch/x86/crypto/twofish_glue_3way.c | 1 + 9 files changed, 9 insertions(+) diff --git a/arch/x86/crypto/aria_aesni_avx2_glue.c b/arch/x86/crypto/aria_aesni_avx2_glue.c index b4bddcd58457..007b250f774c 100644 --- a/arch/x86/crypto/aria_aesni_avx2_glue.c +++ b/arch/x86/crypto/aria_aesni_avx2_glue.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/crypto/aria_aesni_avx_glue.c b/arch/x86/crypto/aria_aesni_avx_glue.c index ab9b38d05332..4c88ef4eba82 100644 --- a/arch/x86/crypto/aria_aesni_avx_glue.c +++ b/arch/x86/crypto/aria_aesni_avx_glue.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index a7d162388142..5c321f255eb7 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c index 3bd37d664121..cbede120e5f2 100644 --- a/arch/x86/crypto/camellia_glue.c +++ b/arch/x86/crypto/camellia_glue.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c index dcfc0de333de..d587f05c3c8c 100644 --- a/arch/x86/crypto/curve25519-x86_64.c +++ b/arch/x86/crypto/curve25519-x86_64.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index e640abc1cb8a..9c8b3a335d5c 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/crypto/sm4_aesni_avx_glue.c b/arch/x86/crypto/sm4_aesni_avx_glue.c index 72867fc49ce8..88caf418a06f 100644 --- a/arch/x86/crypto/sm4_aesni_avx_glue.c +++ b/arch/x86/crypto/sm4_aesni_avx_glue.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index 4c67184dc573..8e9906d36902 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -40,6 +40,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c index 1a1ecfa7f72a..8ad77725bf60 100644 --- a/arch/x86/crypto/twofish_glue_3way.c +++ b/arch/x86/crypto/twofish_glue_3way.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include From 5ffc47feddcf8eb4d8ac7b42111a02c8e8146512 Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Wed, 11 Jun 2025 11:38:08 +0000 Subject: [PATCH 041/116] crypto: caam - Prevent crash on suspend with iMX8QM / iMX8ULP Since the CAAM on these SoCs is managed by another ARM core, called the SECO (Security Controller) on iMX8QM and Secure Enclave on iMX8ULP, which also reserves access to register page 0 suspend operations cannot touch this page. This is similar to when running OPTEE, where OPTEE will reserve page 0. Track this situation using a new state variable no_page0, reflecting if page 0 is reserved elsewhere, either by other management cores in SoC or by OPTEE. Replace the optee_en check in suspend/resume with the new check. optee_en cannot go away as it's needed elsewhere to gate OPTEE specific situations. Fixes the following splat at suspend: Internal error: synchronous external abort: 0000000096000010 [#1] SMP Hardware name: Freescale i.MX8QXP ACU6C (DT) pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : readl+0x0/0x18 lr : rd_reg32+0x18/0x3c sp : ffffffc08192ba20 x29: ffffffc08192ba20 x28: ffffff8025190000 x27: 0000000000000000 x26: ffffffc0808ae808 x25: ffffffc080922338 x24: ffffff8020e89090 x23: 0000000000000000 x22: ffffffc080922000 x21: ffffff8020e89010 x20: ffffffc080387ef8 x19: ffffff8020e89010 x18: 000000005d8000d5 x17: 0000000030f35963 x16: 000000008f785f3f x15: 000000003b8ef57c x14: 00000000c418aef8 x13: 00000000f5fea526 x12: 0000000000000001 x11: 0000000000000002 x10: 0000000000000001 x9 : 0000000000000000 x8 : ffffff8025190870 x7 : ffffff8021726880 x6 : 0000000000000002 x5 : ffffff80217268f0 x4 : ffffff8021726880 x3 : ffffffc081200000 x2 : 0000000000000001 x1 : ffffff8020e89010 x0 : ffffffc081200004 Call trace: readl+0x0/0x18 caam_ctrl_suspend+0x30/0xdc dpm_run_callback.constprop.0+0x24/0x5c device_suspend+0x170/0x2e8 dpm_suspend+0xa0/0x104 dpm_suspend_start+0x48/0x50 suspend_devices_and_enter+0x7c/0x45c pm_suspend+0x148/0x160 state_store+0xb4/0xf8 kobj_attr_store+0x14/0x24 sysfs_kf_write+0x38/0x48 kernfs_fop_write_iter+0xb4/0x178 vfs_write+0x118/0x178 ksys_write+0x6c/0xd0 __arm64_sys_write+0x14/0x1c invoke_syscall.constprop.0+0x64/0xb0 do_el0_svc+0x90/0xb0 el0_svc+0x18/0x44 el0t_64_sync_handler+0x88/0x124 el0t_64_sync+0x150/0x154 Code: 88dffc21 88dffc21 5ac00800 d65f03c0 (b9400000) Fixes: d2835701d93c ("crypto: caam - i.MX8ULP donot have CAAM page0 access") Cc: stable@kernel.org # v6.10+ Signed-off-by: John Ernberg Reviewed-by: Peng Fan Reviewed-by: Frank Li Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 5 +++-- drivers/crypto/caam/intern.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 38ff931059b4..766c447c9cfb 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -831,7 +831,7 @@ static int caam_ctrl_suspend(struct device *dev) { const struct caam_drv_private *ctrlpriv = dev_get_drvdata(dev); - if (ctrlpriv->caam_off_during_pm && !ctrlpriv->optee_en) + if (ctrlpriv->caam_off_during_pm && !ctrlpriv->no_page0) caam_state_save(dev); return 0; @@ -842,7 +842,7 @@ static int caam_ctrl_resume(struct device *dev) struct caam_drv_private *ctrlpriv = dev_get_drvdata(dev); int ret = 0; - if (ctrlpriv->caam_off_during_pm && !ctrlpriv->optee_en) { + if (ctrlpriv->caam_off_during_pm && !ctrlpriv->no_page0) { caam_state_restore(dev); /* HW and rng will be reset so deinstantiation can be removed */ @@ -908,6 +908,7 @@ static int caam_probe(struct platform_device *pdev) imx_soc_data = imx_soc_match->data; reg_access = reg_access && imx_soc_data->page0_access; + ctrlpriv->no_page0 = !reg_access; /* * CAAM clocks cannot be controlled from kernel. */ diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index e51320150872..51c90d17a40d 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -115,6 +115,7 @@ struct caam_drv_private { u8 blob_present; /* Nonzero if BLOB support present in device */ u8 mc_en; /* Nonzero if MC f/w is active */ u8 optee_en; /* Nonzero if OP-TEE f/w is active */ + u8 no_page0; /* Nonzero if register page 0 is not controlled by Linux */ bool pr_support; /* RNG prediction resistance available */ int secvio_irq; /* Security violation interrupt number */ int virt_en; /* Virtualization enabled in CAAM */ From ac8aff0035fa58e53b39bd565ad6422a90ccdc87 Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Wed, 11 Jun 2025 11:38:08 +0000 Subject: [PATCH 042/116] crypto: caam - Support iMX8QXP and variants thereof The iMX8QXP (and variants such as the QX, DX, DXP) all identify as iMX8QXP. They have the exact same restrictions as the supported iMX8QM introduced at commit 61bb8db6f682 ("crypto: caam - Add support for i.MX8QM") Loosen the check a little bit with a wildcard to also match the iMX8QXP and its variants. Signed-off-by: John Ernberg Reviewed-by: Frank Li Reviewed-by: Peng Fan Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 766c447c9cfb..ce7b99019537 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -573,7 +573,7 @@ static const struct soc_device_attribute caam_imx_soc_table[] = { { .soc_id = "i.MX7*", .data = &caam_imx7_data }, { .soc_id = "i.MX8M*", .data = &caam_imx7_data }, { .soc_id = "i.MX8ULP", .data = &caam_imx8ulp_data }, - { .soc_id = "i.MX8QM", .data = &caam_imx8ulp_data }, + { .soc_id = "i.MX8Q*", .data = &caam_imx8ulp_data }, { .soc_id = "VF*", .data = &caam_vf610_data }, { .family = "Freescale i.MX" }, { /* sentinel */ } From 2df6ee328c548a804bd0ecac8b0ee254bf79db23 Mon Sep 17 00:00:00 2001 From: John Ernberg Date: Wed, 11 Jun 2025 11:38:09 +0000 Subject: [PATCH 043/116] dt-bindings: crypto: fsl,sec-v4.0: Add power domains for iMX8QM and iMX8QXP NXP SoCs like the iMX8QM, iMX8QXP or iMX8DXP use power domains for resource management. Add compatible strings for these SoCs (QXP and DXP gets to share as their only difference is a core-count, Q=Quad core and D=Dual core), and allow power-domains for them only. Keep the old restriction for others. Signed-off-by: John Ernberg Reviewed-by: Rob Herring (Arm) Signed-off-by: Herbert Xu --- .../bindings/crypto/fsl,sec-v4.0.yaml | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml index 75afa441e019..dcc755d2709a 100644 --- a/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml +++ b/Documentation/devicetree/bindings/crypto/fsl,sec-v4.0.yaml @@ -46,6 +46,8 @@ properties: - items: - enum: - fsl,imx6ul-caam + - fsl,imx8qm-caam + - fsl,imx8qxp-caam - fsl,sec-v5.0 - const: fsl,sec-v4.0 - const: fsl,sec-v4.0 @@ -77,6 +79,9 @@ properties: interrupts: maxItems: 1 + power-domains: + maxItems: 1 + fsl,sec-era: description: Defines the 'ERA' of the SEC device. $ref: /schemas/types.yaml#/definitions/uint32 @@ -106,7 +111,10 @@ patternProperties: - const: fsl,sec-v5.0-job-ring - const: fsl,sec-v4.0-job-ring - items: - - const: fsl,sec-v5.0-job-ring + - enum: + - fsl,imx8qm-job-ring + - fsl,imx8qxp-job-ring + - fsl,sec-v5.0-job-ring - const: fsl,sec-v4.0-job-ring - const: fsl,sec-v4.0-job-ring @@ -116,6 +124,9 @@ patternProperties: interrupts: maxItems: 1 + power-domains: + maxItems: 1 + fsl,liodn: description: Specifies the LIODN to be used in conjunction with the ppid-to-liodn @@ -125,6 +136,20 @@ patternProperties: $ref: /schemas/types.yaml#/definitions/uint32-array items: - maximum: 0xfff + allOf: + - if: + properties: + compatible: + contains: + enum: + - fsl,imx8qm-job-ring + - fsl,imx8qxp-job-ring + then: + required: + - power-domains + else: + properties: + power-domains: false '^rtic@[0-9a-f]+$': type: object @@ -212,6 +237,20 @@ required: - reg - ranges +if: + properties: + compatible: + contains: + enum: + - fsl,imx8qm-caam + - fsl,imx8qxp-caam +then: + required: + - power-domains +else: + properties: + power-domains: false + additionalProperties: false examples: From fe14fa50581752b41ae5e97887a97c957df56de6 Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Wed, 11 Jun 2025 12:47:25 -0700 Subject: [PATCH 044/116] dt-bindings: crypto: add sama7d65 in Atmel AES Add DT bindings for SAMA7D65 SoC in atmel AES. The SAMA7D65 similar to the SAM9x75 SoC supports HMAC, dual buffer, and GCM. And similar all 3 it supports CBC, CFB, CTR, ECB, and XTS. Signed-off-by: Ryan Wanner Acked-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- .../devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml index 7dc0748444fd..19010f90198a 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-aes.yaml @@ -15,7 +15,9 @@ properties: oneOf: - const: atmel,at91sam9g46-aes - items: - - const: microchip,sam9x7-aes + - enum: + - microchip,sam9x7-aes + - microchip,sama7d65-aes - const: atmel,at91sam9g46-aes reg: From 62a5462a24a32844c269eff50f17412982b118e9 Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Wed, 11 Jun 2025 12:47:26 -0700 Subject: [PATCH 045/116] dt-bindings: crypto: add sama7d65 in Atmel SHA Add DT bindings for SAMA7D65 SoC Atmel SHA. The SAMA7D65 similar to the SAM9x75 SoC supports SHA1/224/256/384/512 and supports HMAC for the same hashes. They both also support automatic padding as well as double buffering. Signed-off-by: Ryan Wanner Acked-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- .../devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml index d378c53314dd..39e076b275b3 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-sha.yaml @@ -15,7 +15,9 @@ properties: oneOf: - const: atmel,at91sam9g46-sha - items: - - const: microchip,sam9x7-sha + - enum: + - microchip,sam9x7-sha + - microchip,sama7d65-sha - const: atmel,at91sam9g46-sha reg: From eafca096a368dda134ed0cda20fc98d314329169 Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Wed, 11 Jun 2025 12:47:27 -0700 Subject: [PATCH 046/116] dt-bindings: crypto: add sama7d65 in Atmel TDES Add DT bindings for SAMA7D65 SoC Atmel TDES. The SAMA7D65 SoC has the same capability as the SAM9x75 SoC. Signed-off-by: Ryan Wanner Acked-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- .../devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml index 6a441f79efea..6f16008c4251 100644 --- a/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml +++ b/Documentation/devicetree/bindings/crypto/atmel,at91sam9g46-tdes.yaml @@ -15,7 +15,9 @@ properties: oneOf: - const: atmel,at91sam9g46-tdes - items: - - const: microchip,sam9x7-tdes + - enum: + - microchip,sam9x7-tdes + - microchip,sama7d65-tdes - const: atmel,at91sam9g46-tdes reg: From b32ab5f768f1c2412978b8eedecb284e4a8654b5 Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Wed, 11 Jun 2025 12:47:28 -0700 Subject: [PATCH 047/116] dt-bindings: rng: atmel,at91-trng: add sama7d65 TRNG Add compatible for Microchip SAMA7D65 SoC TRNG. Signed-off-by: Ryan Wanner Reviewed-by: Claudiu Beznea Acked-by: Rob Herring (Arm) Signed-off-by: Herbert Xu --- Documentation/devicetree/bindings/rng/atmel,at91-trng.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/rng/atmel,at91-trng.yaml b/Documentation/devicetree/bindings/rng/atmel,at91-trng.yaml index b38f8252342e..f78614100ea8 100644 --- a/Documentation/devicetree/bindings/rng/atmel,at91-trng.yaml +++ b/Documentation/devicetree/bindings/rng/atmel,at91-trng.yaml @@ -24,6 +24,7 @@ properties: - items: - enum: - microchip,sam9x7-trng + - microchip,sama7d65-trng - const: microchip,sam9x60-trng clocks: From 3c6e41aa617e988c52937ce241d0d7afc9691b3a Mon Sep 17 00:00:00 2001 From: Ryan Wanner Date: Wed, 11 Jun 2025 12:47:29 -0700 Subject: [PATCH 048/116] crypto: atmel - add support for AES and SHA IPs available on sama7d65 SoC This patch adds support for hardware version of AES and SHA IPs available on SAMA7D65 SoC. Signed-off-by: Ryan Wanner Signed-off-by: Herbert Xu --- drivers/crypto/atmel-aes.c | 1 + drivers/crypto/atmel-sha.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 27c5d000b4b2..3a2684208dda 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -2297,6 +2297,7 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) /* keep only major version number */ switch (dd->hw_version & 0xff0) { + case 0x800: case 0x700: case 0x600: case 0x500: diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 2cc36da163e8..3d7573c7bd1c 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -2534,6 +2534,7 @@ static void atmel_sha_get_cap(struct atmel_sha_dev *dd) /* keep only major version number */ switch (dd->hw_version & 0xff0) { + case 0x800: case 0x700: case 0x600: case 0x510: From d0544657a28aade89e981bebdc116c628d750332 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 11 Jun 2025 18:10:45 -0500 Subject: [PATCH 049/116] dt-bindings: crypto: Convert ti,omap2-aes to DT schema Convert the TI OMAP AES binding to DT schema format. It's a straight forward conversion. Make "ti,hwmods" not required as it is deprecated and only used on OMAP2. Signed-off-by: Rob Herring (Arm) Reviewed-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- .../devicetree/bindings/crypto/omap-aes.txt | 31 ---------- .../bindings/crypto/ti,omap2-aes.yaml | 58 +++++++++++++++++++ 2 files changed, 58 insertions(+), 31 deletions(-) delete mode 100644 Documentation/devicetree/bindings/crypto/omap-aes.txt create mode 100644 Documentation/devicetree/bindings/crypto/ti,omap2-aes.yaml diff --git a/Documentation/devicetree/bindings/crypto/omap-aes.txt b/Documentation/devicetree/bindings/crypto/omap-aes.txt deleted file mode 100644 index fd9717653cbb..000000000000 --- a/Documentation/devicetree/bindings/crypto/omap-aes.txt +++ /dev/null @@ -1,31 +0,0 @@ -OMAP SoC AES crypto Module - -Required properties: - -- compatible : Should contain entries for this and backward compatible - AES versions: - - "ti,omap2-aes" for OMAP2. - - "ti,omap3-aes" for OMAP3. - - "ti,omap4-aes" for OMAP4 and AM33XX. - Note that the OMAP2 and 3 versions are compatible (OMAP3 supports - more algorithms) but they are incompatible with OMAP4. -- ti,hwmods: Name of the hwmod associated with the AES module -- reg : Offset and length of the register set for the module -- interrupts : the interrupt-specifier for the AES module. - -Optional properties: -- dmas: DMA specifiers for tx and rx dma. See the DMA client binding, - Documentation/devicetree/bindings/dma/dma.txt -- dma-names: DMA request names should include "tx" and "rx" if present. - -Example: - /* AM335x */ - aes: aes@53500000 { - compatible = "ti,omap4-aes"; - ti,hwmods = "aes"; - reg = <0x53500000 0xa0>; - interrupts = <102>; - dmas = <&edma 6>, - <&edma 5>; - dma-names = "tx", "rx"; - }; diff --git a/Documentation/devicetree/bindings/crypto/ti,omap2-aes.yaml b/Documentation/devicetree/bindings/crypto/ti,omap2-aes.yaml new file mode 100644 index 000000000000..90e92050ad2e --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/ti,omap2-aes.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ti,omap2-aes.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: OMAP SoC AES crypto Module + +maintainers: + - Aaro Koskinen + - Andreas Kemnade + - Kevin Hilman + - Roger Quadros + - Tony Lindgren + +properties: + compatible: + enum: + - ti,omap2-aes + - ti,omap3-aes + - ti,omap4-aes + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + dmas: + maxItems: 2 + + dma-names: + items: + - const: tx + - const: rx + + ti,hwmods: + description: Name of the hwmod associated with the AES module + const: aes + deprecated: true + +required: + - compatible + - reg + - interrupts + +additionalProperties: false + +examples: + - | + aes@53500000 { + compatible = "ti,omap4-aes"; + reg = <0x53500000 0xa0>; + interrupts = <102>; + dmas = <&edma 6>, + <&edma 5>; + dma-names = "tx", "rx"; + }; From 8c8dea2664fcfe861f10980059a116dfe61f536b Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 11 Jun 2025 18:10:56 -0500 Subject: [PATCH 050/116] dt-bindings: crypto: Convert ti,omap4-des to DT schema Convert the TI OMAP DES binding to DT schema format. Drop "ti,hwmods" as it is not actually used for this binding. Only OMAP2 platforms are using it. Signed-off-by: Rob Herring (Arm) Reviewed-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- .../devicetree/bindings/crypto/omap-des.txt | 30 --------- .../bindings/crypto/ti,omap4-des.yaml | 65 +++++++++++++++++++ 2 files changed, 65 insertions(+), 30 deletions(-) delete mode 100644 Documentation/devicetree/bindings/crypto/omap-des.txt create mode 100644 Documentation/devicetree/bindings/crypto/ti,omap4-des.yaml diff --git a/Documentation/devicetree/bindings/crypto/omap-des.txt b/Documentation/devicetree/bindings/crypto/omap-des.txt deleted file mode 100644 index e8c63bf2e16d..000000000000 --- a/Documentation/devicetree/bindings/crypto/omap-des.txt +++ /dev/null @@ -1,30 +0,0 @@ -OMAP SoC DES crypto Module - -Required properties: - -- compatible : Should contain "ti,omap4-des" -- ti,hwmods: Name of the hwmod associated with the DES module -- reg : Offset and length of the register set for the module -- interrupts : the interrupt-specifier for the DES module -- clocks : A phandle to the functional clock node of the DES module - corresponding to each entry in clock-names -- clock-names : Name of the functional clock, should be "fck" - -Optional properties: -- dmas: DMA specifiers for tx and rx dma. See the DMA client binding, - Documentation/devicetree/bindings/dma/dma.txt - Each entry corresponds to an entry in dma-names -- dma-names: DMA request names should include "tx" and "rx" if present - -Example: - /* DRA7xx SoC */ - des: des@480a5000 { - compatible = "ti,omap4-des"; - ti,hwmods = "des"; - reg = <0x480a5000 0xa0>; - interrupts = ; - dmas = <&sdma 117>, <&sdma 116>; - dma-names = "tx", "rx"; - clocks = <&l3_iclk_div>; - clock-names = "fck"; - }; diff --git a/Documentation/devicetree/bindings/crypto/ti,omap4-des.yaml b/Documentation/devicetree/bindings/crypto/ti,omap4-des.yaml new file mode 100644 index 000000000000..f02f1e141218 --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/ti,omap4-des.yaml @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ti,omap4-des.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: OMAP4 DES crypto Module + +maintainers: + - Aaro Koskinen + - Andreas Kemnade + - Kevin Hilman + - Roger Quadros + - Tony Lindgren + +properties: + compatible: + const: ti,omap4-des + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + dmas: + maxItems: 2 + + dma-names: + items: + - const: tx + - const: rx + + clocks: + maxItems: 1 + + clock-names: + items: + - const: fck + +dependencies: + dmas: [ dma-names ] + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + +additionalProperties: false + +examples: + - | + #include + + des@480a5000 { + compatible = "ti,omap4-des"; + reg = <0x480a5000 0xa0>; + interrupts = ; + clocks = <&l3_iclk_div>; + clock-names = "fck"; + dmas = <&sdma 117>, <&sdma 116>; + dma-names = "tx", "rx"; + }; From 1e2b7fcd3f075ff8c5b0e4474fe145d1c685f54f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 13 Jun 2025 16:51:38 +0800 Subject: [PATCH 051/116] crypto: ahash - Stop legacy tfms from using the set_virt fallback path Ensure that drivers that have not been converted to the ahash API do not use the ahash_request_set_virt fallback path as they cannot use the software fallback. Reported-by: Eric Biggers Fixes: 9d7a0ab1c753 ("crypto: ahash - Handle partial blocks in API") Signed-off-by: Herbert Xu --- crypto/ahash.c | 3 +++ include/crypto/internal/hash.h | 6 ++++++ 2 files changed, 9 insertions(+) diff --git a/crypto/ahash.c b/crypto/ahash.c index bd9e49950201..7a51e0cf9322 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -350,6 +350,9 @@ static int ahash_do_req_chain(struct ahash_request *req, if (!crypto_ahash_need_fallback(tfm)) return -ENOSYS; + if (crypto_hash_no_export_core(tfm)) + return -ENOSYS; + { u8 state[HASH_MAX_STATESIZE]; diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 0f85c543f80b..f052afa6e7b0 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -91,6 +91,12 @@ static inline bool crypto_hash_alg_needs_key(struct hash_alg_common *alg) !(alg->base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY); } +static inline bool crypto_hash_no_export_core(struct crypto_ahash *tfm) +{ + return crypto_hash_alg_common(tfm)->base.cra_flags & + CRYPTO_AHASH_ALG_NO_EXPORT_CORE; +} + int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); From 8024774190a5ef2af2c5846f60a50b23e0980a32 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Fri, 13 Jun 2025 11:32:27 +0100 Subject: [PATCH 052/116] crypto: qat - lower priority for skcipher and aead algorithms Most kernel applications utilizing the crypto API operate synchronously and on small buffer sizes, therefore do not benefit from QAT acceleration. Reduce the priority of QAT implementations for both skcipher and aead algorithms, allowing more suitable alternatives to be selected by default. Signed-off-by: Giovanni Cabiddu Link: https://lore.kernel.org/all/20250613012357.GA3603104@google.com/ Cc: stable@vger.kernel.org Acked-by: Eric Biggers Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/qat_algs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/qat_algs.c b/drivers/crypto/intel/qat/qat_common/qat_algs.c index 3c4bba4a8779..d69cc1e5e023 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_algs.c +++ b/drivers/crypto/intel/qat/qat_common/qat_algs.c @@ -1277,7 +1277,7 @@ static struct aead_alg qat_aeads[] = { { .base = { .cra_name = "authenc(hmac(sha1),cbc(aes))", .cra_driver_name = "qat_aes_cbc_hmac_sha1", - .cra_priority = 4001, + .cra_priority = 100, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct qat_alg_aead_ctx), @@ -1294,7 +1294,7 @@ static struct aead_alg qat_aeads[] = { { .base = { .cra_name = "authenc(hmac(sha256),cbc(aes))", .cra_driver_name = "qat_aes_cbc_hmac_sha256", - .cra_priority = 4001, + .cra_priority = 100, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct qat_alg_aead_ctx), @@ -1311,7 +1311,7 @@ static struct aead_alg qat_aeads[] = { { .base = { .cra_name = "authenc(hmac(sha512),cbc(aes))", .cra_driver_name = "qat_aes_cbc_hmac_sha512", - .cra_priority = 4001, + .cra_priority = 100, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct qat_alg_aead_ctx), @@ -1329,7 +1329,7 @@ static struct aead_alg qat_aeads[] = { { static struct skcipher_alg qat_skciphers[] = { { .base.cra_name = "cbc(aes)", .base.cra_driver_name = "qat_aes_cbc", - .base.cra_priority = 4001, + .base.cra_priority = 100, .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx), @@ -1347,7 +1347,7 @@ static struct skcipher_alg qat_skciphers[] = { { }, { .base.cra_name = "ctr(aes)", .base.cra_driver_name = "qat_aes_ctr", - .base.cra_priority = 4001, + .base.cra_priority = 100, .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct qat_alg_skcipher_ctx), @@ -1365,7 +1365,7 @@ static struct skcipher_alg qat_skciphers[] = { { }, { .base.cra_name = "xts(aes)", .base.cra_driver_name = "qat_aes_xts", - .base.cra_priority = 4001, + .base.cra_priority = 100, .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ALLOCATES_MEMORY, .base.cra_blocksize = AES_BLOCK_SIZE, From 1adaaeeb90c341a7e577b11d57354b22ea307003 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 14 Jun 2025 01:05:05 +0100 Subject: [PATCH 053/116] crypto: virtio - Remove unused virtcrypto functions virtcrypto_devmgr_get_first() and virtcrypto_dev_in_use() were added in 2016 by commit dbaf0624ffa5 ("crypto: add virtio-crypto driver") but have remained unused. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Zenghui Yu Signed-off-by: Herbert Xu --- drivers/crypto/virtio/virtio_crypto_common.h | 2 -- drivers/crypto/virtio/virtio_crypto_mgr.c | 36 -------------------- 2 files changed, 38 deletions(-) diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index 7059bbe5a2eb..19c934af3df6 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -113,8 +113,6 @@ struct virtio_crypto_request { int virtcrypto_devmgr_add_dev(struct virtio_crypto *vcrypto_dev); struct list_head *virtcrypto_devmgr_get_head(void); void virtcrypto_devmgr_rm_dev(struct virtio_crypto *vcrypto_dev); -struct virtio_crypto *virtcrypto_devmgr_get_first(void); -int virtcrypto_dev_in_use(struct virtio_crypto *vcrypto_dev); int virtcrypto_dev_get(struct virtio_crypto *vcrypto_dev); void virtcrypto_dev_put(struct virtio_crypto *vcrypto_dev); int virtcrypto_dev_started(struct virtio_crypto *vcrypto_dev); diff --git a/drivers/crypto/virtio/virtio_crypto_mgr.c b/drivers/crypto/virtio/virtio_crypto_mgr.c index bddbd8ebfebe..06c74fa132cd 100644 --- a/drivers/crypto/virtio/virtio_crypto_mgr.c +++ b/drivers/crypto/virtio/virtio_crypto_mgr.c @@ -81,42 +81,6 @@ void virtcrypto_devmgr_rm_dev(struct virtio_crypto *vcrypto_dev) mutex_unlock(&table_lock); } -/* - * virtcrypto_devmgr_get_first() - * - * Function returns the first virtio crypto device from the acceleration - * framework. - * - * To be used by virtio crypto device specific drivers. - * - * Return: pointer to vcrypto_dev or NULL if not found. - */ -struct virtio_crypto *virtcrypto_devmgr_get_first(void) -{ - struct virtio_crypto *dev = NULL; - - mutex_lock(&table_lock); - if (!list_empty(&virtio_crypto_table)) - dev = list_first_entry(&virtio_crypto_table, - struct virtio_crypto, - list); - mutex_unlock(&table_lock); - return dev; -} - -/* - * virtcrypto_dev_in_use() - Check whether vcrypto_dev is currently in use - * @vcrypto_dev: Pointer to virtio crypto device. - * - * To be used by virtio crypto device specific drivers. - * - * Return: 1 when device is in use, 0 otherwise. - */ -int virtcrypto_dev_in_use(struct virtio_crypto *vcrypto_dev) -{ - return atomic_read(&vcrypto_dev->ref_count) != 0; -} - /* * virtcrypto_dev_get() - Increment vcrypto_dev reference count * @vcrypto_dev: Pointer to virtio crypto device. From f5ad93ffb54119a8dc5e18f070624d4ead586969 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Mon, 16 Jun 2025 04:19:44 +0100 Subject: [PATCH 054/116] crypto: zstd - convert to acomp Convert the implementation to a native acomp interface using zstd streaming APIs, eliminating the need for buffer linearization. This includes: - Removal of the scomp interface in favor of acomp - Refactoring of stream allocation, initialization, and handling for both compression and decompression using Zstandard streaming APIs - Replacement of crypto_register_scomp() with crypto_register_acomp() for module registration Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- crypto/zstd.c | 386 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 249 insertions(+), 137 deletions(-) diff --git a/crypto/zstd.c b/crypto/zstd.c index 7570e11b4ee6..657e0cf7b952 100644 --- a/crypto/zstd.c +++ b/crypto/zstd.c @@ -12,188 +12,300 @@ #include #include #include -#include +#include +#include -#define ZSTD_DEF_LEVEL 3 +#define ZSTD_DEF_LEVEL 3 +#define ZSTD_MAX_WINDOWLOG 18 +#define ZSTD_MAX_SIZE BIT(ZSTD_MAX_WINDOWLOG) struct zstd_ctx { zstd_cctx *cctx; zstd_dctx *dctx; - void *cwksp; - void *dwksp; + size_t wksp_size; + zstd_parameters params; + u8 wksp[0] __aligned(8); }; -static zstd_parameters zstd_params(void) +static DEFINE_MUTEX(zstd_stream_lock); + +static void *zstd_alloc_stream(void) { - return zstd_get_params(ZSTD_DEF_LEVEL, 0); -} - -static int zstd_comp_init(struct zstd_ctx *ctx) -{ - int ret = 0; - const zstd_parameters params = zstd_params(); - const size_t wksp_size = zstd_cctx_workspace_bound(¶ms.cParams); - - ctx->cwksp = vzalloc(wksp_size); - if (!ctx->cwksp) { - ret = -ENOMEM; - goto out; - } - - ctx->cctx = zstd_init_cctx(ctx->cwksp, wksp_size); - if (!ctx->cctx) { - ret = -EINVAL; - goto out_free; - } -out: - return ret; -out_free: - vfree(ctx->cwksp); - goto out; -} - -static int zstd_decomp_init(struct zstd_ctx *ctx) -{ - int ret = 0; - const size_t wksp_size = zstd_dctx_workspace_bound(); - - ctx->dwksp = vzalloc(wksp_size); - if (!ctx->dwksp) { - ret = -ENOMEM; - goto out; - } - - ctx->dctx = zstd_init_dctx(ctx->dwksp, wksp_size); - if (!ctx->dctx) { - ret = -EINVAL; - goto out_free; - } -out: - return ret; -out_free: - vfree(ctx->dwksp); - goto out; -} - -static void zstd_comp_exit(struct zstd_ctx *ctx) -{ - vfree(ctx->cwksp); - ctx->cwksp = NULL; - ctx->cctx = NULL; -} - -static void zstd_decomp_exit(struct zstd_ctx *ctx) -{ - vfree(ctx->dwksp); - ctx->dwksp = NULL; - ctx->dctx = NULL; -} - -static int __zstd_init(void *ctx) -{ - int ret; - - ret = zstd_comp_init(ctx); - if (ret) - return ret; - ret = zstd_decomp_init(ctx); - if (ret) - zstd_comp_exit(ctx); - return ret; -} - -static void *zstd_alloc_ctx(void) -{ - int ret; + zstd_parameters params; struct zstd_ctx *ctx; + size_t wksp_size; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + params = zstd_get_params(ZSTD_DEF_LEVEL, ZSTD_MAX_SIZE); + + wksp_size = max_t(size_t, + zstd_cstream_workspace_bound(¶ms.cParams), + zstd_dstream_workspace_bound(ZSTD_MAX_SIZE)); + if (!wksp_size) + return ERR_PTR(-EINVAL); + + ctx = kvmalloc(sizeof(*ctx) + wksp_size, GFP_KERNEL); if (!ctx) return ERR_PTR(-ENOMEM); - ret = __zstd_init(ctx); - if (ret) { - kfree(ctx); - return ERR_PTR(ret); - } + ctx->params = params; + ctx->wksp_size = wksp_size; return ctx; } -static void __zstd_exit(void *ctx) +static struct crypto_acomp_streams zstd_streams = { + .alloc_ctx = zstd_alloc_stream, + .cfree_ctx = kvfree, +}; + +static int zstd_init(struct crypto_acomp *acomp_tfm) { - zstd_comp_exit(ctx); - zstd_decomp_exit(ctx); + int ret = 0; + + mutex_lock(&zstd_stream_lock); + ret = crypto_acomp_alloc_streams(&zstd_streams); + mutex_unlock(&zstd_stream_lock); + + return ret; } -static void zstd_free_ctx(void *ctx) +static void zstd_exit(struct crypto_acomp *acomp_tfm) { - __zstd_exit(ctx); - kfree_sensitive(ctx); + crypto_acomp_free_streams(&zstd_streams); } -static int __zstd_compress(const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen, void *ctx) +static int zstd_compress_one(struct acomp_req *req, struct zstd_ctx *ctx, + const void *src, void *dst, unsigned int *dlen) { - size_t out_len; - struct zstd_ctx *zctx = ctx; - const zstd_parameters params = zstd_params(); + unsigned int out_len; - out_len = zstd_compress_cctx(zctx->cctx, dst, *dlen, src, slen, ¶ms); + ctx->cctx = zstd_init_cctx(ctx->wksp, ctx->wksp_size); + if (!ctx->cctx) + return -EINVAL; + + out_len = zstd_compress_cctx(ctx->cctx, dst, req->dlen, src, req->slen, + &ctx->params); if (zstd_is_error(out_len)) return -EINVAL; + *dlen = out_len; + return 0; } -static int zstd_scompress(struct crypto_scomp *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen, - void *ctx) +static int zstd_compress(struct acomp_req *req) { - return __zstd_compress(src, slen, dst, dlen, ctx); -} + struct crypto_acomp_stream *s; + unsigned int pos, scur, dcur; + unsigned int total_out = 0; + bool data_available = true; + zstd_out_buffer outbuf; + struct acomp_walk walk; + zstd_in_buffer inbuf; + struct zstd_ctx *ctx; + size_t pending_bytes; + size_t num_bytes; + int ret; -static int __zstd_decompress(const u8 *src, unsigned int slen, - u8 *dst, unsigned int *dlen, void *ctx) -{ - size_t out_len; - struct zstd_ctx *zctx = ctx; + s = crypto_acomp_lock_stream_bh(&zstd_streams); + ctx = s->ctx; - out_len = zstd_decompress_dctx(zctx->dctx, dst, *dlen, src, slen); - if (zstd_is_error(out_len)) - return -EINVAL; - *dlen = out_len; - return 0; -} + ret = acomp_walk_virt(&walk, req, true); + if (ret) + goto out; -static int zstd_sdecompress(struct crypto_scomp *tfm, const u8 *src, - unsigned int slen, u8 *dst, unsigned int *dlen, - void *ctx) -{ - return __zstd_decompress(src, slen, dst, dlen, ctx); -} - -static struct scomp_alg scomp = { - .alloc_ctx = zstd_alloc_ctx, - .free_ctx = zstd_free_ctx, - .compress = zstd_scompress, - .decompress = zstd_sdecompress, - .base = { - .cra_name = "zstd", - .cra_driver_name = "zstd-scomp", - .cra_module = THIS_MODULE, + ctx->cctx = zstd_init_cstream(&ctx->params, 0, ctx->wksp, ctx->wksp_size); + if (!ctx->cctx) { + ret = -EINVAL; + goto out; } + + do { + dcur = acomp_walk_next_dst(&walk); + if (!dcur) { + ret = -ENOSPC; + goto out; + } + + outbuf.pos = 0; + outbuf.dst = (u8 *)walk.dst.virt.addr; + outbuf.size = dcur; + + do { + scur = acomp_walk_next_src(&walk); + if (dcur == req->dlen && scur == req->slen) { + ret = zstd_compress_one(req, ctx, walk.src.virt.addr, + walk.dst.virt.addr, &total_out); + acomp_walk_done_src(&walk, scur); + acomp_walk_done_dst(&walk, dcur); + goto out; + } + + if (scur) { + inbuf.pos = 0; + inbuf.src = walk.src.virt.addr; + inbuf.size = scur; + } else { + data_available = false; + break; + } + + num_bytes = zstd_compress_stream(ctx->cctx, &outbuf, &inbuf); + if (ZSTD_isError(num_bytes)) { + ret = -EIO; + goto out; + } + + pending_bytes = zstd_flush_stream(ctx->cctx, &outbuf); + if (ZSTD_isError(pending_bytes)) { + ret = -EIO; + goto out; + } + acomp_walk_done_src(&walk, inbuf.pos); + } while (dcur != outbuf.pos); + + total_out += outbuf.pos; + acomp_walk_done_dst(&walk, dcur); + } while (data_available); + + pos = outbuf.pos; + num_bytes = zstd_end_stream(ctx->cctx, &outbuf); + if (ZSTD_isError(num_bytes)) + ret = -EIO; + else + total_out += (outbuf.pos - pos); + +out: + if (ret) + req->dlen = 0; + else + req->dlen = total_out; + + crypto_acomp_unlock_stream_bh(s); + + return ret; +} + +static int zstd_decompress_one(struct acomp_req *req, struct zstd_ctx *ctx, + const void *src, void *dst, unsigned int *dlen) +{ + size_t out_len; + + ctx->dctx = zstd_init_dctx(ctx->wksp, ctx->wksp_size); + if (!ctx->dctx) + return -EINVAL; + + out_len = zstd_decompress_dctx(ctx->dctx, dst, req->dlen, src, req->slen); + if (zstd_is_error(out_len)) + return -EINVAL; + + *dlen = out_len; + + return 0; +} + +static int zstd_decompress(struct acomp_req *req) +{ + struct crypto_acomp_stream *s; + unsigned int total_out = 0; + unsigned int scur, dcur; + zstd_out_buffer outbuf; + struct acomp_walk walk; + zstd_in_buffer inbuf; + struct zstd_ctx *ctx; + size_t pending_bytes; + int ret; + + s = crypto_acomp_lock_stream_bh(&zstd_streams); + ctx = s->ctx; + + ret = acomp_walk_virt(&walk, req, true); + if (ret) + goto out; + + ctx->dctx = zstd_init_dstream(ZSTD_MAX_SIZE, ctx->wksp, ctx->wksp_size); + if (!ctx->dctx) { + ret = -EINVAL; + goto out; + } + + do { + scur = acomp_walk_next_src(&walk); + if (scur) { + inbuf.pos = 0; + inbuf.size = scur; + inbuf.src = walk.src.virt.addr; + } else { + break; + } + + do { + dcur = acomp_walk_next_dst(&walk); + if (dcur == req->dlen && scur == req->slen) { + ret = zstd_decompress_one(req, ctx, walk.src.virt.addr, + walk.dst.virt.addr, &total_out); + acomp_walk_done_dst(&walk, dcur); + acomp_walk_done_src(&walk, scur); + goto out; + } + + if (!dcur) { + ret = -ENOSPC; + goto out; + } + + outbuf.pos = 0; + outbuf.dst = (u8 *)walk.dst.virt.addr; + outbuf.size = dcur; + + pending_bytes = zstd_decompress_stream(ctx->dctx, &outbuf, &inbuf); + if (ZSTD_isError(pending_bytes)) { + ret = -EIO; + goto out; + } + + total_out += outbuf.pos; + + acomp_walk_done_dst(&walk, outbuf.pos); + } while (scur != inbuf.pos); + + if (scur) + acomp_walk_done_src(&walk, scur); + } while (ret == 0); + +out: + if (ret) + req->dlen = 0; + else + req->dlen = total_out; + + crypto_acomp_unlock_stream_bh(s); + + return ret; +} + +static struct acomp_alg zstd_acomp = { + .base = { + .cra_name = "zstd", + .cra_driver_name = "zstd-generic", + .cra_flags = CRYPTO_ALG_REQ_VIRT, + .cra_module = THIS_MODULE, + }, + .init = zstd_init, + .exit = zstd_exit, + .compress = zstd_compress, + .decompress = zstd_decompress, }; static int __init zstd_mod_init(void) { - return crypto_register_scomp(&scomp); + return crypto_register_acomp(&zstd_acomp); } static void __exit zstd_mod_fini(void) { - crypto_unregister_scomp(&scomp); + crypto_unregister_acomp(&zstd_acomp); } module_init(zstd_mod_init); From ab8b9fd39c45b7760093528cbef93e7353359d82 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 16 Jun 2025 21:50:27 +0000 Subject: [PATCH 055/116] crypto: ccp - Fix SNP panic notifier unregistration Panic notifiers are invoked with RCU read lock held and when the SNP panic notifier tries to unregister itself from the panic notifier callback itself it causes a deadlock as notifier unregistration does RCU synchronization. Code flow for SNP panic notifier: snp_shutdown_on_panic() -> __sev_firmware_shutdown() -> __sev_snp_shutdown_locked() -> atomic_notifier_chain_unregister(.., &snp_panic_notifier) Fix SNP panic notifier to unregister itself during SNP shutdown only if panic is not in progress. Reviewed-by: Tom Lendacky Cc: stable@vger.kernel.org Fixes: 19860c3274fb ("crypto: ccp - Register SNP panic notifier only if SNP is enabled") Signed-off-by: Ashish Kalra Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 8fb94c5f006a..17edc6bf5622 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1787,8 +1787,14 @@ static int __sev_snp_shutdown_locked(int *error, bool panic) sev->snp_initialized = false; dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n"); - atomic_notifier_chain_unregister(&panic_notifier_list, - &snp_panic_notifier); + /* + * __sev_snp_shutdown_locked() deadlocks when it tries to unregister + * itself during panic as the panic notifier is called with RCU read + * lock held and notifier unregistration does RCU synchronization. + */ + if (!panic) + atomic_notifier_chain_unregister(&panic_notifier_list, + &snp_panic_notifier); /* Reset TMR size back to default */ sev_es_tmr_size = SEV_TMR_SIZE; From 7ae637a269cdc41cdebf86341a5cf3f2281e805c Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Tue, 17 Jun 2025 11:52:32 +0100 Subject: [PATCH 056/116] crypto: qat - remove duplicate masking for GEN6 devices The ICP_ACCEL_CAPABILITIES_CIPHER capability is masked out redundantly for QAT GEN6 devices. Remove it to avoid code duplication. This does not introduce any functional change. Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c index 48a29a102dd0..cc8554c65d0b 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c @@ -629,7 +629,6 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CHACHA_POLY; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AESGCM_SPC; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AES_V2; - capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER; } if (fusectl1 & ICP_ACCEL_GEN6_MASK_AUTH_SLICE) { capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION; From 4e55a929ff4d973206879a997a47a188353b3cd6 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Tue, 17 Jun 2025 12:27:12 +0100 Subject: [PATCH 057/116] crypto: qat - restore ASYM service support for GEN6 devices Support for asymmetric crypto services was not included in the qat_6xxx by explicitly setting the asymmetric capabilities to 0 to allow for additional testing. Enable asymmetric crypto services on QAT GEN6 devices by setting the appropriate capability flags. Fixes: 17fd7514ae68 ("crypto: qat - add qat_6xxx driver") Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c index cc8554c65d0b..435d2ff38ab3 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c @@ -637,7 +637,15 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER; } - capabilities_asym = 0; + capabilities_asym = ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC | + ICP_ACCEL_CAPABILITIES_SM2 | + ICP_ACCEL_CAPABILITIES_ECEDMONT; + + if (fusectl1 & ICP_ACCEL_GEN6_MASK_PKE_SLICE) { + capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC; + capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_SM2; + capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_ECEDMONT; + } capabilities_dc = ICP_ACCEL_CAPABILITIES_COMPRESSION | ICP_ACCEL_CAPABILITIES_LZ4_COMPRESSION | From e109b8ee1a3d5665f2f60612a60ad6c95339f5d3 Mon Sep 17 00:00:00 2001 From: Jeff Barnes Date: Tue, 17 Jun 2025 15:30:05 -0400 Subject: [PATCH 058/116] crypto: testmgr - Restore sha384 and hmac_sha384 drbgs in FIPS mode Set .fips_allowed in the following drbg alg_test_desc structs. drbg_nopr_hmac_sha384 drbg_nopr_sha384 drbg_pr_hmac_sha384 drbg_pr_sha384 The sha384 and hmac_sha384 DRBGs with and without prediction resistance were disallowed in an early version of the FIPS 140-3 Implementation Guidance document. Hence, the fips_allowed flag in struct alg_test_desc pertaining to the affected DRBGs was unset. The IG has been withdrawn and they are allowed again. Furthermore, when the DRBGs are configured, /proc/crypto shows that drbg_*pr_sha384 and drbg_*pr_hmac_sha384 are fips-approved ("fips: yes") but because their self-tests are not run (a consequence of unsetting the fips_allowed flag), the drbgs won't load successfully with the seeming contradictory "fips: yes" in /proc/crypto. This series contains a single patch that sets the fips_allowed flag in the sha384-impacted DRBGs, which restores the ability to load them in FIPS mode. Link: https://lore.kernel.org/linux-crypto/979f4f6f-bb74-4b93-8cbf-6ed653604f0e@jvdsn.com/ Link: https://csrc.nist.gov/CSRC/media/Projects/cryptographic-module-validation-program/documents/fips%20140-3/FIPS%20140-3%20IG.pdf To: Herbert Xu To: David S. Miller Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Jeff Barnes Signed-off-by: Herbert Xu --- crypto/testmgr.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index a4ad939e03c9..196509c44d47 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4735,6 +4735,7 @@ static const struct alg_test_desc alg_test_descs[] = { */ .alg = "drbg_nopr_hmac_sha384", .test = alg_test_null, + .fips_allowed = 1 }, { .alg = "drbg_nopr_hmac_sha512", .test = alg_test_drbg, @@ -4753,6 +4754,7 @@ static const struct alg_test_desc alg_test_descs[] = { /* covered by drbg_nopr_sha256 test */ .alg = "drbg_nopr_sha384", .test = alg_test_null, + .fips_allowed = 1 }, { .alg = "drbg_nopr_sha512", .fips_allowed = 1, @@ -4784,6 +4786,7 @@ static const struct alg_test_desc alg_test_descs[] = { /* covered by drbg_pr_hmac_sha256 test */ .alg = "drbg_pr_hmac_sha384", .test = alg_test_null, + .fips_allowed = 1 }, { .alg = "drbg_pr_hmac_sha512", .test = alg_test_null, @@ -4799,6 +4802,7 @@ static const struct alg_test_desc alg_test_descs[] = { /* covered by drbg_pr_sha256 test */ .alg = "drbg_pr_sha384", .test = alg_test_null, + .fips_allowed = 1 }, { .alg = "drbg_pr_sha512", .fips_allowed = 1, From c71187c17f0b9fa7a567f09fc079369ae3970e85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 18 Jun 2025 09:10:18 +0200 Subject: [PATCH 059/116] crypto: ccree - Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the past %pK was preferable to %p as it would not leak raw pointer values into the kernel log. Since commit ad67b74d2469 ("printk: hash addresses printed with %p") the regular %p has been improved to avoid this issue. Furthermore, restricted pointers ("%pK") were never meant to be used through printk(). They can still unintentionally leak raw pointers or acquire sleeping locks in atomic contexts. Switch to the regular pointer formatting which is safer and easier to reason about. Signed-off-by: Thomas Weißschuh Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_buffer_mgr.c | 54 ++++++++++++++-------------- drivers/crypto/ccree/cc_cipher.c | 4 +-- drivers/crypto/ccree/cc_hash.c | 30 ++++++++-------- 3 files changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index bcca55bff910..3963bb91321f 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -224,7 +224,7 @@ static int cc_generate_mlli(struct device *dev, struct buffer_array *sg_data, /* Set MLLI size for the bypass operation */ mlli_params->mlli_len = (total_nents * LLI_ENTRY_BYTE_SIZE); - dev_dbg(dev, "MLLI params: virt_addr=%pK dma_addr=%pad mlli_len=0x%X\n", + dev_dbg(dev, "MLLI params: virt_addr=%p dma_addr=%pad mlli_len=0x%X\n", mlli_params->mlli_virt_addr, &mlli_params->mlli_dma_addr, mlli_params->mlli_len); @@ -239,7 +239,7 @@ static void cc_add_sg_entry(struct device *dev, struct buffer_array *sgl_data, { unsigned int index = sgl_data->num_of_buffers; - dev_dbg(dev, "index=%u nents=%u sgl=%pK data_len=0x%08X is_last=%d\n", + dev_dbg(dev, "index=%u nents=%u sgl=%p data_len=0x%08X is_last=%d\n", index, nents, sgl, data_len, is_last_table); sgl_data->nents[index] = nents; sgl_data->entry[index].sgl = sgl; @@ -298,7 +298,7 @@ cc_set_aead_conf_buf(struct device *dev, struct aead_req_ctx *areq_ctx, dev_err(dev, "dma_map_sg() config buffer failed\n"); return -ENOMEM; } - dev_dbg(dev, "Mapped curr_buff: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n", + dev_dbg(dev, "Mapped curr_buff: dma_address=%pad page=%p addr=%p offset=%u length=%u\n", &sg_dma_address(&areq_ctx->ccm_adata_sg), sg_page(&areq_ctx->ccm_adata_sg), sg_virt(&areq_ctx->ccm_adata_sg), @@ -323,7 +323,7 @@ static int cc_set_hash_buf(struct device *dev, struct ahash_req_ctx *areq_ctx, dev_err(dev, "dma_map_sg() src buffer failed\n"); return -ENOMEM; } - dev_dbg(dev, "Mapped curr_buff: dma_address=%pad page=%p addr=%pK offset=%u length=%u\n", + dev_dbg(dev, "Mapped curr_buff: dma_address=%pad page=%p addr=%p offset=%u length=%u\n", &sg_dma_address(areq_ctx->buff_sg), sg_page(areq_ctx->buff_sg), sg_virt(areq_ctx->buff_sg), areq_ctx->buff_sg->offset, areq_ctx->buff_sg->length); @@ -359,11 +359,11 @@ void cc_unmap_cipher_request(struct device *dev, void *ctx, if (src != dst) { dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_TO_DEVICE); dma_unmap_sg(dev, dst, req_ctx->out_nents, DMA_FROM_DEVICE); - dev_dbg(dev, "Unmapped req->dst=%pK\n", sg_virt(dst)); - dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); + dev_dbg(dev, "Unmapped req->dst=%p\n", sg_virt(dst)); + dev_dbg(dev, "Unmapped req->src=%p\n", sg_virt(src)); } else { dma_unmap_sg(dev, src, req_ctx->in_nents, DMA_BIDIRECTIONAL); - dev_dbg(dev, "Unmapped req->src=%pK\n", sg_virt(src)); + dev_dbg(dev, "Unmapped req->src=%p\n", sg_virt(src)); } } @@ -391,11 +391,11 @@ int cc_map_cipher_request(struct cc_drvdata *drvdata, void *ctx, req_ctx->gen_ctx.iv_dma_addr = dma_map_single(dev, info, ivsize, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, req_ctx->gen_ctx.iv_dma_addr)) { - dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping iv %u B at va=%p for DMA failed\n", ivsize, info); return -ENOMEM; } - dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n", + dev_dbg(dev, "Mapped iv %u B at va=%p to dma=%pad\n", ivsize, info, &req_ctx->gen_ctx.iv_dma_addr); } else { req_ctx->gen_ctx.iv_dma_addr = 0; @@ -506,7 +506,7 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) if ((areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI || areq_ctx->data_buff_type == CC_DMA_BUF_MLLI) && (areq_ctx->mlli_params.mlli_virt_addr)) { - dev_dbg(dev, "free MLLI buffer: dma=%pad virt=%pK\n", + dev_dbg(dev, "free MLLI buffer: dma=%pad virt=%p\n", &areq_ctx->mlli_params.mlli_dma_addr, areq_ctx->mlli_params.mlli_virt_addr); dma_pool_free(areq_ctx->mlli_params.curr_pool, @@ -514,13 +514,13 @@ void cc_unmap_aead_request(struct device *dev, struct aead_request *req) areq_ctx->mlli_params.mlli_dma_addr); } - dev_dbg(dev, "Unmapping src sgl: req->src=%pK areq_ctx->src.nents=%u areq_ctx->assoc.nents=%u assoclen:%u cryptlen=%u\n", + dev_dbg(dev, "Unmapping src sgl: req->src=%p areq_ctx->src.nents=%u areq_ctx->assoc.nents=%u assoclen:%u cryptlen=%u\n", sg_virt(req->src), areq_ctx->src.nents, areq_ctx->assoc.nents, areq_ctx->assoclen, req->cryptlen); dma_unmap_sg(dev, req->src, areq_ctx->src.mapped_nents, src_direction); if (req->src != req->dst) { - dev_dbg(dev, "Unmapping dst sgl: req->dst=%pK\n", + dev_dbg(dev, "Unmapping dst sgl: req->dst=%p\n", sg_virt(req->dst)); dma_unmap_sg(dev, req->dst, areq_ctx->dst.mapped_nents, DMA_FROM_DEVICE); } @@ -566,7 +566,7 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, dma_map_single(dev, areq_ctx->gen_ctx.iv, hw_iv_size, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, areq_ctx->gen_ctx.iv_dma_addr)) { - dev_err(dev, "Mapping iv %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping iv %u B at va=%p for DMA failed\n", hw_iv_size, req->iv); kfree_sensitive(areq_ctx->gen_ctx.iv); areq_ctx->gen_ctx.iv = NULL; @@ -574,7 +574,7 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, goto chain_iv_exit; } - dev_dbg(dev, "Mapped iv %u B at va=%pK to dma=%pad\n", + dev_dbg(dev, "Mapped iv %u B at va=%p to dma=%pad\n", hw_iv_size, req->iv, &areq_ctx->gen_ctx.iv_dma_addr); chain_iv_exit: @@ -977,7 +977,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) dma_addr = dma_map_single(dev, areq_ctx->mac_buf, MAX_MAC_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_addr)) { - dev_err(dev, "Mapping mac_buf %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping mac_buf %u B at va=%p for DMA failed\n", MAX_MAC_SIZE, areq_ctx->mac_buf); rc = -ENOMEM; goto aead_map_failure; @@ -991,7 +991,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) { - dev_err(dev, "Mapping mac_buf %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping mac_buf %u B at va=%p for DMA failed\n", AES_BLOCK_SIZE, addr); areq_ctx->ccm_iv0_dma_addr = 0; rc = -ENOMEM; @@ -1009,7 +1009,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) dma_addr = dma_map_single(dev, areq_ctx->hkey, AES_BLOCK_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, dma_addr)) { - dev_err(dev, "Mapping hkey %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping hkey %u B at va=%p for DMA failed\n", AES_BLOCK_SIZE, areq_ctx->hkey); rc = -ENOMEM; goto aead_map_failure; @@ -1019,7 +1019,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) dma_addr = dma_map_single(dev, &areq_ctx->gcm_len_block, AES_BLOCK_SIZE, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) { - dev_err(dev, "Mapping gcm_len_block %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping gcm_len_block %u B at va=%p for DMA failed\n", AES_BLOCK_SIZE, &areq_ctx->gcm_len_block); rc = -ENOMEM; goto aead_map_failure; @@ -1030,7 +1030,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) AES_BLOCK_SIZE, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) { - dev_err(dev, "Mapping gcm_iv_inc1 %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping gcm_iv_inc1 %u B at va=%p for DMA failed\n", AES_BLOCK_SIZE, (areq_ctx->gcm_iv_inc1)); areq_ctx->gcm_iv_inc1_dma_addr = 0; rc = -ENOMEM; @@ -1042,7 +1042,7 @@ int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) AES_BLOCK_SIZE, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) { - dev_err(dev, "Mapping gcm_iv_inc2 %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping gcm_iv_inc2 %u B at va=%p for DMA failed\n", AES_BLOCK_SIZE, (areq_ctx->gcm_iv_inc2)); areq_ctx->gcm_iv_inc2_dma_addr = 0; rc = -ENOMEM; @@ -1152,7 +1152,7 @@ int cc_map_hash_request_final(struct cc_drvdata *drvdata, void *ctx, u32 dummy = 0; u32 mapped_nents = 0; - dev_dbg(dev, "final params : curr_buff=%pK curr_buff_cnt=0x%X nbytes = 0x%X src=%pK curr_index=%u\n", + dev_dbg(dev, "final params : curr_buff=%p curr_buff_cnt=0x%X nbytes = 0x%X src=%p curr_index=%u\n", curr_buff, *curr_buff_cnt, nbytes, src, areq_ctx->buff_index); /* Init the type of the dma buffer */ areq_ctx->data_dma_buf_type = CC_DMA_BUF_NULL; @@ -1236,7 +1236,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, u32 dummy = 0; u32 mapped_nents = 0; - dev_dbg(dev, " update params : curr_buff=%pK curr_buff_cnt=0x%X nbytes=0x%X src=%pK curr_index=%u\n", + dev_dbg(dev, " update params : curr_buff=%p curr_buff_cnt=0x%X nbytes=0x%X src=%p curr_index=%u\n", curr_buff, *curr_buff_cnt, nbytes, src, areq_ctx->buff_index); /* Init the type of the dma buffer */ areq_ctx->data_dma_buf_type = CC_DMA_BUF_NULL; @@ -1246,7 +1246,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, areq_ctx->in_nents = 0; if (total_in_len < block_size) { - dev_dbg(dev, " less than one block: curr_buff=%pK *curr_buff_cnt=0x%X copy_to=%pK\n", + dev_dbg(dev, " less than one block: curr_buff=%p *curr_buff_cnt=0x%X copy_to=%p\n", curr_buff, *curr_buff_cnt, &curr_buff[*curr_buff_cnt]); areq_ctx->in_nents = sg_nents_for_len(src, nbytes); sg_copy_to_buffer(src, areq_ctx->in_nents, @@ -1265,7 +1265,7 @@ int cc_map_hash_request_update(struct cc_drvdata *drvdata, void *ctx, /* Copy the new residue to next buffer */ if (*next_buff_cnt) { - dev_dbg(dev, " handle residue: next buff %pK skip data %u residue %u\n", + dev_dbg(dev, " handle residue: next buff %p skip data %u residue %u\n", next_buff, (update_data_len - *curr_buff_cnt), *next_buff_cnt); cc_copy_sg_portion(dev, next_buff, src, @@ -1338,7 +1338,7 @@ void cc_unmap_hash_request(struct device *dev, void *ctx, *allocated and should be released */ if (areq_ctx->mlli_params.curr_pool) { - dev_dbg(dev, "free MLLI buffer: dma=%pad virt=%pK\n", + dev_dbg(dev, "free MLLI buffer: dma=%pad virt=%p\n", &areq_ctx->mlli_params.mlli_dma_addr, areq_ctx->mlli_params.mlli_virt_addr); dma_pool_free(areq_ctx->mlli_params.curr_pool, @@ -1347,14 +1347,14 @@ void cc_unmap_hash_request(struct device *dev, void *ctx, } if (src && areq_ctx->in_nents) { - dev_dbg(dev, "Unmapped sg src: virt=%pK dma=%pad len=0x%X\n", + dev_dbg(dev, "Unmapped sg src: virt=%p dma=%pad len=0x%X\n", sg_virt(src), &sg_dma_address(src), sg_dma_len(src)); dma_unmap_sg(dev, src, areq_ctx->in_nents, DMA_TO_DEVICE); } if (*prev_len) { - dev_dbg(dev, "Unmapped buffer: areq_ctx->buff_sg=%pK dma=%pad len 0x%X\n", + dev_dbg(dev, "Unmapped buffer: areq_ctx->buff_sg=%p dma=%pad len 0x%X\n", sg_virt(areq_ctx->buff_sg), &sg_dma_address(areq_ctx->buff_sg), sg_dma_len(areq_ctx->buff_sg)); diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index d39c067672fd..e2cbfdf7a0e4 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -211,11 +211,11 @@ static int cc_cipher_init(struct crypto_tfm *tfm) max_key_buf_size, DMA_TO_DEVICE); if (dma_mapping_error(dev, ctx_p->user.key_dma_addr)) { - dev_err(dev, "Mapping Key %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping Key %u B at va=%p for DMA failed\n", max_key_buf_size, ctx_p->user.key); goto free_key; } - dev_dbg(dev, "Mapped key %u B at va=%pK to dma=%pad\n", + dev_dbg(dev, "Mapped key %u B at va=%p to dma=%pad\n", max_key_buf_size, ctx_p->user.key, &ctx_p->user.key_dma_addr); return 0; diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index d0612bec4d58..c6d085c8ff79 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -125,7 +125,7 @@ static int cc_map_result(struct device *dev, struct ahash_req_ctx *state, digestsize); return -ENOMEM; } - dev_dbg(dev, "Mapped digest result buffer %u B at va=%pK to dma=%pad\n", + dev_dbg(dev, "Mapped digest result buffer %u B at va=%p to dma=%pad\n", digestsize, state->digest_result_buff, &state->digest_result_dma_addr); @@ -184,11 +184,11 @@ static int cc_map_req(struct device *dev, struct ahash_req_ctx *state, dma_map_single(dev, state->digest_buff, ctx->inter_digestsize, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, state->digest_buff_dma_addr)) { - dev_err(dev, "Mapping digest len %d B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping digest len %d B at va=%p for DMA failed\n", ctx->inter_digestsize, state->digest_buff); return -EINVAL; } - dev_dbg(dev, "Mapped digest %d B at va=%pK to dma=%pad\n", + dev_dbg(dev, "Mapped digest %d B at va=%p to dma=%pad\n", ctx->inter_digestsize, state->digest_buff, &state->digest_buff_dma_addr); @@ -197,11 +197,11 @@ static int cc_map_req(struct device *dev, struct ahash_req_ctx *state, dma_map_single(dev, state->digest_bytes_len, HASH_MAX_LEN_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, state->digest_bytes_len_dma_addr)) { - dev_err(dev, "Mapping digest len %u B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping digest len %u B at va=%p for DMA failed\n", HASH_MAX_LEN_SIZE, state->digest_bytes_len); goto unmap_digest_buf; } - dev_dbg(dev, "Mapped digest len %u B at va=%pK to dma=%pad\n", + dev_dbg(dev, "Mapped digest len %u B at va=%p to dma=%pad\n", HASH_MAX_LEN_SIZE, state->digest_bytes_len, &state->digest_bytes_len_dma_addr); } @@ -212,12 +212,12 @@ static int cc_map_req(struct device *dev, struct ahash_req_ctx *state, ctx->inter_digestsize, DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, state->opad_digest_dma_addr)) { - dev_err(dev, "Mapping opad digest %d B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping opad digest %d B at va=%p for DMA failed\n", ctx->inter_digestsize, state->opad_digest_buff); goto unmap_digest_len; } - dev_dbg(dev, "Mapped opad digest %d B at va=%pK to dma=%pad\n", + dev_dbg(dev, "Mapped opad digest %d B at va=%p to dma=%pad\n", ctx->inter_digestsize, state->opad_digest_buff, &state->opad_digest_dma_addr); } @@ -272,7 +272,7 @@ static void cc_unmap_result(struct device *dev, struct ahash_req_ctx *state, if (state->digest_result_dma_addr) { dma_unmap_single(dev, state->digest_result_dma_addr, digestsize, DMA_BIDIRECTIONAL); - dev_dbg(dev, "unmpa digest result buffer va (%pK) pa (%pad) len %u\n", + dev_dbg(dev, "unmpa digest result buffer va (%p) pa (%pad) len %u\n", state->digest_result_buff, &state->digest_result_dma_addr, digestsize); memcpy(result, state->digest_result_buff, digestsize); @@ -287,7 +287,7 @@ static void cc_update_complete(struct device *dev, void *cc_req, int err) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); - dev_dbg(dev, "req=%pK\n", req); + dev_dbg(dev, "req=%p\n", req); if (err != -EINPROGRESS) { /* Not a BACKLOG notification */ @@ -306,7 +306,7 @@ static void cc_digest_complete(struct device *dev, void *cc_req, int err) struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); - dev_dbg(dev, "req=%pK\n", req); + dev_dbg(dev, "req=%p\n", req); if (err != -EINPROGRESS) { /* Not a BACKLOG notification */ @@ -326,7 +326,7 @@ static void cc_hash_complete(struct device *dev, void *cc_req, int err) struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); - dev_dbg(dev, "req=%pK\n", req); + dev_dbg(dev, "req=%p\n", req); if (err != -EINPROGRESS) { /* Not a BACKLOG notification */ @@ -1077,11 +1077,11 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx) dma_map_single(dev, ctx->digest_buff, sizeof(ctx->digest_buff), DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, ctx->digest_buff_dma_addr)) { - dev_err(dev, "Mapping digest len %zu B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping digest len %zu B at va=%p for DMA failed\n", sizeof(ctx->digest_buff), ctx->digest_buff); goto fail; } - dev_dbg(dev, "Mapped digest %zu B at va=%pK to dma=%pad\n", + dev_dbg(dev, "Mapped digest %zu B at va=%p to dma=%pad\n", sizeof(ctx->digest_buff), ctx->digest_buff, &ctx->digest_buff_dma_addr); @@ -1090,12 +1090,12 @@ static int cc_alloc_ctx(struct cc_hash_ctx *ctx) sizeof(ctx->opad_tmp_keys_buff), DMA_BIDIRECTIONAL); if (dma_mapping_error(dev, ctx->opad_tmp_keys_dma_addr)) { - dev_err(dev, "Mapping opad digest %zu B at va=%pK for DMA failed\n", + dev_err(dev, "Mapping opad digest %zu B at va=%p for DMA failed\n", sizeof(ctx->opad_tmp_keys_buff), ctx->opad_tmp_keys_buff); goto fail; } - dev_dbg(dev, "Mapped opad_tmp_keys %zu B at va=%pK to dma=%pad\n", + dev_dbg(dev, "Mapped opad_tmp_keys %zu B at va=%p to dma=%pad\n", sizeof(ctx->opad_tmp_keys_buff), ctx->opad_tmp_keys_buff, &ctx->opad_tmp_keys_dma_addr); From 2566de3e06a35b6517bcab11fd25b65ef90fd180 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 18 Jun 2025 18:00:26 +0800 Subject: [PATCH 060/116] crypto: hisilicon - Use fine grained DMA mapping direction The following splat was triggered when booting the kernel built with arm64's defconfig + CRYPTO_SELFTESTS + DMA_API_DEBUG. ------------[ cut here ]------------ DMA-API: hisi_sec2 0000:75:00.0: cacheline tracking EEXIST, overlapping mappings aren't supported WARNING: CPU: 24 PID: 1273 at kernel/dma/debug.c:596 add_dma_entry+0x248/0x308 Call trace: add_dma_entry+0x248/0x308 (P) debug_dma_map_sg+0x208/0x3e4 __dma_map_sg_attrs+0xbc/0x118 dma_map_sg_attrs+0x10/0x24 hisi_acc_sg_buf_map_to_hw_sgl+0x80/0x218 [hisi_qm] sec_cipher_map+0xc4/0x338 [hisi_sec2] sec_aead_sgl_map+0x18/0x24 [hisi_sec2] sec_process+0xb8/0x36c [hisi_sec2] sec_aead_crypto+0xe4/0x264 [hisi_sec2] sec_aead_encrypt+0x14/0x20 [hisi_sec2] crypto_aead_encrypt+0x24/0x38 test_aead_vec_cfg+0x480/0x7e4 test_aead_vec+0x84/0x1b8 alg_test_aead+0xc0/0x498 alg_test.part.0+0x518/0x524 alg_test+0x20/0x64 cryptomgr_test+0x24/0x44 kthread+0x130/0x1fc ret_from_fork+0x10/0x20 ---[ end trace 0000000000000000 ]--- DMA-API: Mapped at: debug_dma_map_sg+0x234/0x3e4 __dma_map_sg_attrs+0xbc/0x118 dma_map_sg_attrs+0x10/0x24 hisi_acc_sg_buf_map_to_hw_sgl+0x80/0x218 [hisi_qm] sec_cipher_map+0xc4/0x338 [hisi_sec2] This occurs in selftests where the input and the output scatterlist point to the same underlying memory (e.g., when tested with INPLACE_TWO_SGLISTS mode). The problem is that the hisi_sec2 driver maps these two different scatterlists using the DMA_BIDIRECTIONAL flag which leads to overlapped write mappings which are not supported by the DMA layer. Fix it by using the fine grained and correct DMA mapping directions. While at it, switch the DMA directions used by the hisi_zip driver too. Signed-off-by: Zenghui Yu Reviewed-by: Longfang Liu Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec_crypto.c | 21 +++++++++++++-------- drivers/crypto/hisilicon/sgl.c | 15 ++++++++------- drivers/crypto/hisilicon/zip/zip_crypto.c | 13 +++++++------ include/linux/hisi_acc_qm.h | 4 ++-- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 8ea5305bc320..7d04e770a8c2 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -965,6 +965,7 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, struct sec_qp_ctx *qp_ctx = req->qp_ctx; struct sec_alg_res *res = &qp_ctx->res[req->req_id]; struct device *dev = ctx->dev; + enum dma_data_direction src_direction; int ret; if (req->use_pbuf) { @@ -990,10 +991,11 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, a_req->out_mac_dma = res->out_mac_dma; } + src_direction = dst == src ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; req->in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src, qp_ctx->c_in_pool, req->req_id, - &req->in_dma); + &req->in_dma, src_direction); if (IS_ERR(req->in)) { dev_err(dev, "fail to dma map input sgl buffers!\n"); return PTR_ERR(req->in); @@ -1003,7 +1005,7 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, ret = sec_aead_mac_init(a_req); if (unlikely(ret)) { dev_err(dev, "fail to init mac data for ICV!\n"); - hisi_acc_sg_buf_unmap(dev, src, req->in); + hisi_acc_sg_buf_unmap(dev, src, req->in, src_direction); return ret; } } @@ -1015,11 +1017,12 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, c_req->c_out = hisi_acc_sg_buf_map_to_hw_sgl(dev, dst, qp_ctx->c_out_pool, req->req_id, - &c_req->c_out_dma); + &c_req->c_out_dma, + DMA_FROM_DEVICE); if (IS_ERR(c_req->c_out)) { dev_err(dev, "fail to dma map output sgl buffers!\n"); - hisi_acc_sg_buf_unmap(dev, src, req->in); + hisi_acc_sg_buf_unmap(dev, src, req->in, src_direction); return PTR_ERR(c_req->c_out); } } @@ -1036,10 +1039,12 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req, if (req->use_pbuf) { sec_cipher_pbuf_unmap(ctx, req, dst); } else { - if (dst != src) - hisi_acc_sg_buf_unmap(dev, src, req->in); - - hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out); + if (dst != src) { + hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out, DMA_FROM_DEVICE); + hisi_acc_sg_buf_unmap(dev, src, req->in, DMA_TO_DEVICE); + } else { + hisi_acc_sg_buf_unmap(dev, src, req->in, DMA_BIDIRECTIONAL); + } } } diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index c974f95cd126..7a9ef2a9972a 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -210,15 +210,15 @@ static void clear_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl) * @pool: Pool which hw sgl memory will be allocated in. * @index: Index of hisi_acc_hw_sgl in pool. * @hw_sgl_dma: The dma address of allocated hw sgl. + * @dir: DMA direction. * * This function builds hw sgl according input sgl, user can use hw_sgl_dma * as src/dst in its BD. Only support single hw sgl currently. */ struct hisi_acc_hw_sgl * -hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, - struct scatterlist *sgl, - struct hisi_acc_sgl_pool *pool, - u32 index, dma_addr_t *hw_sgl_dma) +hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, struct scatterlist *sgl, + struct hisi_acc_sgl_pool *pool, u32 index, + dma_addr_t *hw_sgl_dma, enum dma_data_direction dir) { struct hisi_acc_hw_sgl *curr_hw_sgl; unsigned int i, sg_n_mapped; @@ -232,7 +232,7 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, sg_n = sg_nents(sgl); - sg_n_mapped = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL); + sg_n_mapped = dma_map_sg(dev, sgl, sg_n, dir); if (!sg_n_mapped) { dev_err(dev, "DMA mapping for SG error!\n"); return ERR_PTR(-EINVAL); @@ -276,16 +276,17 @@ EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl); * @dev: The device which hw sgl belongs to. * @sgl: Related scatterlist. * @hw_sgl: Virtual address of hw sgl. + * @dir: DMA direction. * * This function unmaps allocated hw sgl. */ void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl, - struct hisi_acc_hw_sgl *hw_sgl) + struct hisi_acc_hw_sgl *hw_sgl, enum dma_data_direction dir) { if (!dev || !sgl || !hw_sgl) return; - dma_unmap_sg(dev, sgl, sg_nents(sgl), DMA_BIDIRECTIONAL); + dma_unmap_sg(dev, sgl, sg_nents(sgl), dir); clear_hw_sgl_sge(hw_sgl); hw_sgl->entry_sum_in_chain = 0; hw_sgl->entry_sum_in_sgl = 0; diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index 7327f8f29b01..b97513981a3b 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -224,7 +224,8 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx, return -EINVAL; req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->src, pool, - req->req_id << 1, &req->dma_src); + req->req_id << 1, &req->dma_src, + DMA_TO_DEVICE); if (IS_ERR(req->hw_src)) { dev_err(dev, "failed to map the src buffer to hw sgl (%ld)!\n", PTR_ERR(req->hw_src)); @@ -233,7 +234,7 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx, req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, a_req->dst, pool, (req->req_id << 1) + 1, - &req->dma_dst); + &req->dma_dst, DMA_FROM_DEVICE); if (IS_ERR(req->hw_dst)) { ret = PTR_ERR(req->hw_dst); dev_err(dev, "failed to map the dst buffer to hw slg (%d)!\n", @@ -258,9 +259,9 @@ static int hisi_zip_do_work(struct hisi_zip_qp_ctx *qp_ctx, return -EINPROGRESS; err_unmap_output: - hisi_acc_sg_buf_unmap(dev, a_req->dst, req->hw_dst); + hisi_acc_sg_buf_unmap(dev, a_req->dst, req->hw_dst, DMA_FROM_DEVICE); err_unmap_input: - hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src); + hisi_acc_sg_buf_unmap(dev, a_req->src, req->hw_src, DMA_TO_DEVICE); return ret; } @@ -303,8 +304,8 @@ static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data) err = -EIO; } - hisi_acc_sg_buf_unmap(dev, acomp_req->src, req->hw_src); - hisi_acc_sg_buf_unmap(dev, acomp_req->dst, req->hw_dst); + hisi_acc_sg_buf_unmap(dev, acomp_req->dst, req->hw_dst, DMA_FROM_DEVICE); + hisi_acc_sg_buf_unmap(dev, acomp_req->src, req->hw_src, DMA_TO_DEVICE); acomp_req->dlen = ops->get_dstlen(sqe); diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 99fcf65d575f..0c4c84b8c3be 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -556,9 +556,9 @@ int hisi_qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue, struct hisi_acc_sgl_pool; struct hisi_acc_hw_sgl *hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, struct scatterlist *sgl, struct hisi_acc_sgl_pool *pool, - u32 index, dma_addr_t *hw_sgl_dma); + u32 index, dma_addr_t *hw_sgl_dma, enum dma_data_direction dir); void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl, - struct hisi_acc_hw_sgl *hw_sgl); + struct hisi_acc_hw_sgl *hw_sgl, enum dma_data_direction dir); struct hisi_acc_sgl_pool *hisi_acc_create_sgl_pool(struct device *dev, u32 count, u32 sge_nr); void hisi_acc_free_sgl_pool(struct device *dev, From fa13f1d7c71423a53234c03771fde47f0743a2ef Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 23 Jun 2025 19:17:27 +0800 Subject: [PATCH 061/116] crypto: aspeed - Fix hash fallback path typo Fix typo in the fallback code path. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202506231830.us4hiwlZ-lkp@intel.com/ Signed-off-by: Herbert Xu --- drivers/crypto/aspeed/aspeed-hace-hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/aspeed/aspeed-hace-hash.c b/drivers/crypto/aspeed/aspeed-hace-hash.c index e54b7dd03be3..f8f37c9d5f3c 100644 --- a/drivers/crypto/aspeed/aspeed-hace-hash.c +++ b/drivers/crypto/aspeed/aspeed-hace-hash.c @@ -449,7 +449,7 @@ static noinline int aspeed_ahash_fallback(struct ahash_request *req) if (rctx->flags & SHA_FLAGS_FINUP) ret = ret ?: crypto_ahash_finup(fbreq); else - ret = ret ?: crypto_ahash_update(fbreq); + ret = ret ?: crypto_ahash_update(fbreq) ?: crypto_ahash_export_core(fbreq, state) ?: aspeed_sham_import(req, state); HASH_REQUEST_ZERO(fbreq); From 2f8839e6c5f8e200629fd730aac5dd874c1d2544 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 17 Jun 2025 15:44:35 +0200 Subject: [PATCH 062/116] crypto: ahash - make hash walk functions from ahash.c public Make the hash walk functions crypto_hash_walk_done() crypto_hash_walk_first() crypto_hash_walk_last() public again. These functions had been removed from the header file include/crypto/internal/hash.h with commit 7fa481734016 ("crypto: ahash - make hash walk functions private to ahash.c") as there was no crypto algorithm code using them. With the upcoming crypto implementation for s390 phmac these functions will be exploited and thus need to be public within the kernel again. Signed-off-by: Harald Freudenberger Acked-by: Holger Dengler Signed-off-by: Herbert Xu --- crypto/ahash.c | 26 +++++--------------------- include/crypto/internal/hash.h | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 7a51e0cf9322..00a5e36f710b 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -29,19 +29,6 @@ #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e -struct crypto_hash_walk { - const char *data; - - unsigned int offset; - unsigned int flags; - - struct page *pg; - unsigned int entrylen; - - unsigned int total; - struct scatterlist *sg; -}; - static int ahash_def_finup(struct ahash_request *req); static inline bool crypto_ahash_block_only(struct crypto_ahash *tfm) @@ -112,8 +99,8 @@ static int hash_walk_new_entry(struct crypto_hash_walk *walk) return hash_walk_next(walk); } -static int crypto_hash_walk_first(struct ahash_request *req, - struct crypto_hash_walk *walk) +int crypto_hash_walk_first(struct ahash_request *req, + struct crypto_hash_walk *walk) { walk->total = req->nbytes; walk->entrylen = 0; @@ -133,8 +120,9 @@ static int crypto_hash_walk_first(struct ahash_request *req, return hash_walk_new_entry(walk); } +EXPORT_SYMBOL_GPL(crypto_hash_walk_first); -static int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) +int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) { if ((walk->flags & CRYPTO_AHASH_REQ_VIRT)) return err; @@ -160,11 +148,7 @@ static int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) return hash_walk_new_entry(walk); } - -static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk) -{ - return !(walk->entrylen | walk->total); -} +EXPORT_SYMBOL_GPL(crypto_hash_walk_done); /* * For an ahash tfm that is using an shash algorithm (instead of an ahash diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index f052afa6e7b0..2456d6ea73f0 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -30,6 +30,20 @@ __##name##_req, (req)) struct ahash_request; +struct scatterlist; + +struct crypto_hash_walk { + const char *data; + + unsigned int offset; + unsigned int flags; + + struct page *pg; + unsigned int entrylen; + + unsigned int total; + struct scatterlist *sg; +}; struct ahash_instance { void (*free)(struct ahash_instance *inst); @@ -61,6 +75,15 @@ struct crypto_shash_spawn { struct crypto_spawn base; }; +int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err); +int crypto_hash_walk_first(struct ahash_request *req, + struct crypto_hash_walk *walk); + +static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk) +{ + return !(walk->entrylen | walk->total); +} + int crypto_register_ahash(struct ahash_alg *alg); void crypto_unregister_ahash(struct ahash_alg *alg); int crypto_register_ahashes(struct ahash_alg *algs, int count); From 86ca5cb84fc339a08dfd673a50c6f3bea1b2f956 Mon Sep 17 00:00:00 2001 From: Holger Dengler Date: Tue, 17 Jun 2025 15:44:36 +0200 Subject: [PATCH 063/116] s390/crypto: Add protected key hmac subfunctions for KMAC The CPACF KMAC instruction supports new subfunctions for protected key hmac. Add defines for these 4 new subfuctions. Signed-off-by: Holger Dengler Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Signed-off-by: Herbert Xu --- arch/s390/include/asm/cpacf.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 54cb97603ec0..4bc5317fbb12 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -129,6 +129,10 @@ #define CPACF_KMAC_HMAC_SHA_256 0x71 #define CPACF_KMAC_HMAC_SHA_384 0x72 #define CPACF_KMAC_HMAC_SHA_512 0x73 +#define CPACF_KMAC_PHMAC_SHA_224 0x78 +#define CPACF_KMAC_PHMAC_SHA_256 0x79 +#define CPACF_KMAC_PHMAC_SHA_384 0x7a +#define CPACF_KMAC_PHMAC_SHA_512 0x7b /* * Function codes for the PCKMO (PERFORM CRYPTOGRAPHIC KEY MANAGEMENT) From cbbc675506cc4cd93e6f895e7c1f693156f9a95c Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 17 Jun 2025 15:44:37 +0200 Subject: [PATCH 064/116] crypto: s390 - New s390 specific protected key hash phmac Add support for protected key hmac ("phmac") for s390 arch. With the latest machine generation there is now support for protected key (that is a key wrapped by a master key stored in firmware) hmac for sha2 (sha224, sha256, sha384 and sha512) for the s390 specific CPACF instruction kmac. This patch adds support via 4 new ahashes registered as phmac(sha224), phmac(sha256), phmac(sha384) and phmac(sha512). Co-developed-by: Holger Dengler Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Signed-off-by: Holger Dengler Signed-off-by: Herbert Xu --- arch/s390/configs/debug_defconfig | 1 + arch/s390/configs/defconfig | 1 + arch/s390/crypto/Makefile | 1 + arch/s390/crypto/phmac_s390.c | 911 ++++++++++++++++++++++++++++++ drivers/crypto/Kconfig | 13 + 5 files changed, 927 insertions(+) create mode 100644 arch/s390/crypto/phmac_s390.c diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 8ecad727497e..e91fe6c7832e 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -819,6 +819,7 @@ CONFIG_PKEY_EP11=m CONFIG_PKEY_PCKMO=m CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m +CONFIG_CRYPTO_PHMAC_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CRYPTO_KRB5=m diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index c13a77765162..fae834beff57 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -806,6 +806,7 @@ CONFIG_PKEY_EP11=m CONFIG_PKEY_PCKMO=m CONFIG_PKEY_UV=m CONFIG_CRYPTO_PAES_S390=m +CONFIG_CRYPTO_PHMAC_S390=m CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_CRYPTO_KRB5=m diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index 21757d86cd49..6d1f1df46036 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -13,4 +13,5 @@ obj-$(CONFIG_CRYPTO_PAES_S390) += paes_s390.o obj-$(CONFIG_S390_PRNG) += prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) += ghash_s390.o obj-$(CONFIG_CRYPTO_HMAC_S390) += hmac_s390.o +obj-$(CONFIG_CRYPTO_PHMAC_S390) += phmac_s390.o obj-y += arch_random.o diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c new file mode 100644 index 000000000000..8473af47f5b9 --- /dev/null +++ b/arch/s390/crypto/phmac_s390.c @@ -0,0 +1,911 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright IBM Corp. 2025 + * + * s390 specific HMAC support for protected keys. + */ + +#define KMSG_COMPONENT "phmac_s390" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct crypto_engine *phmac_crypto_engine; +#define MAX_QLEN 10 + +/* + * A simple hash walk helper + */ + +struct hash_walk_helper { + struct crypto_hash_walk walk; + const u8 *walkaddr; + int walkbytes; +}; + +/* + * Prepare hash walk helper. + * Set up the base hash walk, fill walkaddr and walkbytes. + * Returns 0 on success or negative value on error. + */ +static inline int hwh_prepare(struct ahash_request *req, + struct hash_walk_helper *hwh) +{ + hwh->walkbytes = crypto_hash_walk_first(req, &hwh->walk); + if (hwh->walkbytes < 0) + return hwh->walkbytes; + hwh->walkaddr = hwh->walk.data; + return 0; +} + +/* + * Advance hash walk helper by n bytes. + * Progress the walkbytes and walkaddr fields by n bytes. + * If walkbytes is then 0, pull next hunk from hash walk + * and update walkbytes and walkaddr. + * If n is negative, unmap hash walk and return error. + * Returns 0 on success or negative value on error. + */ +static inline int hwh_advance(struct hash_walk_helper *hwh, int n) +{ + if (n < 0) + return crypto_hash_walk_done(&hwh->walk, n); + + hwh->walkbytes -= n; + hwh->walkaddr += n; + if (hwh->walkbytes > 0) + return 0; + + hwh->walkbytes = crypto_hash_walk_done(&hwh->walk, 0); + if (hwh->walkbytes < 0) + return hwh->walkbytes; + + hwh->walkaddr = hwh->walk.data; + return 0; +} + +/* + * KMAC param block layout for sha2 function codes: + * The layout of the param block for the KMAC instruction depends on the + * blocksize of the used hashing sha2-algorithm function codes. The param block + * contains the hash chaining value (cv), the input message bit-length (imbl) + * and the hmac-secret (key). To prevent code duplication, the sizes of all + * these are calculated based on the blocksize. + * + * param-block: + * +-------+ + * | cv | + * +-------+ + * | imbl | + * +-------+ + * | key | + * +-------+ + * + * sizes: + * part | sh2-alg | calculation | size | type + * -----+---------+-------------+------+-------- + * cv | 224/256 | blocksize/2 | 32 | u64[8] + * | 384/512 | | 64 | u128[8] + * imbl | 224/256 | blocksize/8 | 8 | u64 + * | 384/512 | | 16 | u128 + * key | 224/256 | blocksize | 96 | u8[96] + * | 384/512 | | 160 | u8[160] + */ + +#define MAX_DIGEST_SIZE SHA512_DIGEST_SIZE +#define MAX_IMBL_SIZE sizeof(u128) +#define MAX_BLOCK_SIZE SHA512_BLOCK_SIZE + +#define SHA2_CV_SIZE(bs) ((bs) >> 1) +#define SHA2_IMBL_SIZE(bs) ((bs) >> 3) + +#define SHA2_IMBL_OFFSET(bs) (SHA2_CV_SIZE(bs)) +#define SHA2_KEY_OFFSET(bs) (SHA2_CV_SIZE(bs) + SHA2_IMBL_SIZE(bs)) + +#define PHMAC_MAX_KEYSIZE 256 +#define PHMAC_SHA256_PK_SIZE (SHA256_BLOCK_SIZE + 32) +#define PHMAC_SHA512_PK_SIZE (SHA512_BLOCK_SIZE + 32) +#define PHMAC_MAX_PK_SIZE PHMAC_SHA512_PK_SIZE + +/* phmac protected key struct */ +struct phmac_protkey { + u32 type; + u32 len; + u8 protkey[PHMAC_MAX_PK_SIZE]; +}; + +#define PK_STATE_NO_KEY 0 +#define PK_STATE_CONVERT_IN_PROGRESS 1 +#define PK_STATE_VALID 2 + +/* phmac tfm context */ +struct phmac_tfm_ctx { + /* source key material used to derive a protected key from */ + u8 keybuf[PHMAC_MAX_KEYSIZE]; + unsigned int keylen; + + /* cpacf function code to use with this protected key type */ + long fc; + + /* nr of requests enqueued via crypto engine which use this tfm ctx */ + atomic_t via_engine_ctr; + + /* spinlock to atomic read/update all the following fields */ + spinlock_t pk_lock; + + /* see PK_STATE* defines above, < 0 holds convert failure rc */ + int pk_state; + /* if state is valid, pk holds the protected key */ + struct phmac_protkey pk; +}; + +union kmac_gr0 { + unsigned long reg; + struct { + unsigned long : 48; + unsigned long ikp : 1; + unsigned long iimp : 1; + unsigned long ccup : 1; + unsigned long : 6; + unsigned long fc : 7; + }; +}; + +struct kmac_sha2_ctx { + u8 param[MAX_DIGEST_SIZE + MAX_IMBL_SIZE + PHMAC_MAX_PK_SIZE]; + union kmac_gr0 gr0; + u8 buf[MAX_BLOCK_SIZE]; + u64 buflen[2]; +}; + +/* phmac request context */ +struct phmac_req_ctx { + struct hash_walk_helper hwh; + struct kmac_sha2_ctx kmac_ctx; + bool final; +}; + +/* + * phmac_tfm_ctx_setkey() - Set key value into tfm context, maybe construct + * a clear key token digestible by pkey from a clear key value. + */ +static inline int phmac_tfm_ctx_setkey(struct phmac_tfm_ctx *tfm_ctx, + const u8 *key, unsigned int keylen) +{ + if (keylen > sizeof(tfm_ctx->keybuf)) + return -EINVAL; + + memcpy(tfm_ctx->keybuf, key, keylen); + tfm_ctx->keylen = keylen; + + return 0; +} + +/* + * Convert the raw key material into a protected key via PKEY api. + * This function may sleep - don't call in non-sleeping context. + */ +static inline int convert_key(const u8 *key, unsigned int keylen, + struct phmac_protkey *pk) +{ + int rc, i; + + pk->len = sizeof(pk->protkey); + + /* + * In case of a busy card retry with increasing delay + * of 200, 400, 800 and 1600 ms - in total 3 s. + */ + for (rc = -EIO, i = 0; rc && i < 5; i++) { + if (rc == -EBUSY && msleep_interruptible((1 << i) * 100)) { + rc = -EINTR; + goto out; + } + rc = pkey_key2protkey(key, keylen, + pk->protkey, &pk->len, &pk->type, + PKEY_XFLAG_NOMEMALLOC); + } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +/* + * (Re-)Convert the raw key material from the tfm ctx into a protected + * key via convert_key() function. Update the pk_state, pk_type, pk_len + * and the protected key in the tfm context. + * Please note this function may be invoked concurrently with the very + * same tfm context. The pk_lock spinlock in the context ensures an + * atomic update of the pk and the pk state but does not guarantee any + * order of update. So a fresh converted valid protected key may get + * updated with an 'old' expired key value. As the cpacf instructions + * detect this, refuse to operate with an invalid key and the calling + * code triggers a (re-)conversion this does no harm. This may lead to + * unnecessary additional conversion but never to invalid data on the + * hash operation. + */ +static int phmac_convert_key(struct phmac_tfm_ctx *tfm_ctx) +{ + struct phmac_protkey pk; + int rc; + + spin_lock_bh(&tfm_ctx->pk_lock); + tfm_ctx->pk_state = PK_STATE_CONVERT_IN_PROGRESS; + spin_unlock_bh(&tfm_ctx->pk_lock); + + rc = convert_key(tfm_ctx->keybuf, tfm_ctx->keylen, &pk); + + /* update context */ + spin_lock_bh(&tfm_ctx->pk_lock); + if (rc) { + tfm_ctx->pk_state = rc; + } else { + tfm_ctx->pk_state = PK_STATE_VALID; + tfm_ctx->pk = pk; + } + spin_unlock_bh(&tfm_ctx->pk_lock); + + memzero_explicit(&pk, sizeof(pk)); + pr_debug("rc=%d\n", rc); + return rc; +} + +/* + * kmac_sha2_set_imbl - sets the input message bit-length based on the blocksize + */ +static inline void kmac_sha2_set_imbl(u8 *param, u64 buflen_lo, + u64 buflen_hi, unsigned int blocksize) +{ + u8 *imbl = param + SHA2_IMBL_OFFSET(blocksize); + + switch (blocksize) { + case SHA256_BLOCK_SIZE: + *(u64 *)imbl = buflen_lo * BITS_PER_BYTE; + break; + case SHA512_BLOCK_SIZE: + *(u128 *)imbl = (((u128)buflen_hi << 64) + buflen_lo) << 3; + break; + default: + break; + } +} + +static int phmac_kmac_update(struct ahash_request *req, bool maysleep) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx; + struct hash_walk_helper *hwh = &req_ctx->hwh; + unsigned int bs = crypto_ahash_blocksize(tfm); + unsigned int offset, k, n; + int rc = 0; + + /* + * The walk is always mapped when this function is called. + * Note that in case of partial processing or failure the walk + * is NOT unmapped here. So a follow up task may reuse the walk + * or in case of unrecoverable failure needs to unmap it. + */ + + while (hwh->walkbytes > 0) { + /* check sha2 context buffer */ + offset = ctx->buflen[0] % bs; + if (offset + hwh->walkbytes < bs) + goto store; + + if (offset) { + /* fill ctx buffer up to blocksize and process this block */ + n = bs - offset; + memcpy(ctx->buf + offset, hwh->walkaddr, n); + ctx->gr0.iimp = 1; + for (;;) { + k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, bs); + if (likely(k == bs)) + break; + if (unlikely(k > 0)) { + /* + * Can't deal with hunks smaller than blocksize. + * And kmac should always return the nr of + * processed bytes as 0 or a multiple of the + * blocksize. + */ + rc = -EIO; + goto out; + } + /* protected key is invalid and needs re-conversion */ + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = phmac_convert_key(tfm_ctx); + if (rc) + goto out; + spin_lock_bh(&tfm_ctx->pk_lock); + memcpy(ctx->param + SHA2_KEY_OFFSET(bs), + tfm_ctx->pk.protkey, tfm_ctx->pk.len); + spin_unlock_bh(&tfm_ctx->pk_lock); + } + ctx->buflen[0] += n; + if (ctx->buflen[0] < n) + ctx->buflen[1]++; + rc = hwh_advance(hwh, n); + if (unlikely(rc)) + goto out; + offset = 0; + } + + /* process as many blocks as possible from the walk */ + while (hwh->walkbytes >= bs) { + n = (hwh->walkbytes / bs) * bs; + ctx->gr0.iimp = 1; + k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, hwh->walkaddr, n); + if (likely(k > 0)) { + ctx->buflen[0] += k; + if (ctx->buflen[0] < k) + ctx->buflen[1]++; + rc = hwh_advance(hwh, k); + if (unlikely(rc)) + goto out; + } + if (unlikely(k < n)) { + /* protected key is invalid and needs re-conversion */ + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = phmac_convert_key(tfm_ctx); + if (rc) + goto out; + spin_lock_bh(&tfm_ctx->pk_lock); + memcpy(ctx->param + SHA2_KEY_OFFSET(bs), + tfm_ctx->pk.protkey, tfm_ctx->pk.len); + spin_unlock_bh(&tfm_ctx->pk_lock); + } + } + +store: + /* store incomplete block in context buffer */ + if (hwh->walkbytes) { + memcpy(ctx->buf + offset, hwh->walkaddr, hwh->walkbytes); + ctx->buflen[0] += hwh->walkbytes; + if (ctx->buflen[0] < hwh->walkbytes) + ctx->buflen[1]++; + rc = hwh_advance(hwh, hwh->walkbytes); + if (unlikely(rc)) + goto out; + } + + } /* end of while (hwh->walkbytes > 0) */ + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_kmac_final(struct ahash_request *req, bool maysleep) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx; + unsigned int ds = crypto_ahash_digestsize(tfm); + unsigned int bs = crypto_ahash_blocksize(tfm); + unsigned int k, n; + int rc = 0; + + n = ctx->buflen[0] % bs; + ctx->gr0.iimp = 0; + kmac_sha2_set_imbl(ctx->param, ctx->buflen[0], ctx->buflen[1], bs); + for (;;) { + k = _cpacf_kmac(&ctx->gr0.reg, ctx->param, ctx->buf, n); + if (likely(k == n)) + break; + if (unlikely(k > 0)) { + /* Can't deal with hunks smaller than blocksize. */ + rc = -EIO; + goto out; + } + /* protected key is invalid and needs re-conversion */ + if (!maysleep) { + rc = -EKEYEXPIRED; + goto out; + } + rc = phmac_convert_key(tfm_ctx); + if (rc) + goto out; + spin_lock_bh(&tfm_ctx->pk_lock); + memcpy(ctx->param + SHA2_KEY_OFFSET(bs), + tfm_ctx->pk.protkey, tfm_ctx->pk.len); + spin_unlock_bh(&tfm_ctx->pk_lock); + } + + memcpy(req->result, ctx->param, ds); + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + unsigned int bs = crypto_ahash_blocksize(tfm); + int rc = 0; + + /* zero request context (includes the kmac sha2 context) */ + memset(req_ctx, 0, sizeof(*req_ctx)); + + /* + * setkey() should have set a valid fc into the tfm context. + * Copy this function code into the gr0 field of the kmac context. + */ + if (!tfm_ctx->fc) { + rc = -ENOKEY; + goto out; + } + kmac_ctx->gr0.fc = tfm_ctx->fc; + + /* + * Copy the pk from tfm ctx into kmac ctx. The protected key + * may be outdated but update() and final() will handle this. + */ + spin_lock_bh(&tfm_ctx->pk_lock); + memcpy(kmac_ctx->param + SHA2_KEY_OFFSET(bs), + tfm_ctx->pk.protkey, tfm_ctx->pk.len); + spin_unlock_bh(&tfm_ctx->pk_lock); + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_update(struct ahash_request *req) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + struct hash_walk_helper *hwh = &req_ctx->hwh; + int rc; + + /* prep the walk in the request context */ + rc = hwh_prepare(req, hwh); + if (rc) + goto out; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&tfm_ctx->via_engine_ctr)) { + rc = phmac_kmac_update(req, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + atomic_inc(&tfm_ctx->via_engine_ctr); + rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&tfm_ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) { + hwh_advance(hwh, rc); + memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); + } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_final(struct ahash_request *req) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + int rc = 0; + + /* Try synchronous operation if no active engine usage */ + if (!atomic_read(&tfm_ctx->via_engine_ctr)) { + rc = phmac_kmac_final(req, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + req->nbytes = 0; + req_ctx->final = true; + atomic_inc(&tfm_ctx->via_engine_ctr); + rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&tfm_ctx->via_engine_ctr); + } + +out: + if (rc != -EINPROGRESS) + memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_finup(struct ahash_request *req) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + struct hash_walk_helper *hwh = &req_ctx->hwh; + int rc; + + /* prep the walk in the request context */ + rc = hwh_prepare(req, hwh); + if (rc) + goto out; + + /* Try synchronous operations if no active engine usage */ + if (!atomic_read(&tfm_ctx->via_engine_ctr)) { + rc = phmac_kmac_update(req, false); + if (rc == 0) + req->nbytes = 0; + } + if (!rc && !req->nbytes && !atomic_read(&tfm_ctx->via_engine_ctr)) { + rc = phmac_kmac_final(req, false); + if (rc == 0) + goto out; + } + + /* + * If sync operation failed or key expired or there are already + * requests enqueued via engine, fallback to async. Mark tfm as + * using engine to serialize requests. + */ + if (rc == 0 || rc == -EKEYEXPIRED) { + req_ctx->final = true; + atomic_inc(&tfm_ctx->via_engine_ctr); + rc = crypto_transfer_hash_request_to_engine(phmac_crypto_engine, req); + if (rc != -EINPROGRESS) + atomic_dec(&tfm_ctx->via_engine_ctr); + } + + if (rc != -EINPROGRESS) + hwh_advance(hwh, rc); + +out: + if (rc != -EINPROGRESS) + memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_digest(struct ahash_request *req) +{ + int rc; + + rc = phmac_init(req); + if (rc) + goto out; + + rc = phmac_finup(req); + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_setkey(struct crypto_ahash *tfm, + const u8 *key, unsigned int keylen) +{ + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + unsigned int ds = crypto_ahash_digestsize(tfm); + int rc = 0; + + /* copy raw key into tfm context */ + rc = phmac_tfm_ctx_setkey(tfm_ctx, key, keylen); + if (rc) + goto out; + + /* convert raw key into protected key */ + rc = phmac_convert_key(tfm_ctx); + if (rc) + goto out; + + /* set function code in tfm context, check for valid pk type */ + switch (ds) { + case SHA224_DIGEST_SIZE: + if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512) + rc = -EINVAL; + else + tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_224; + break; + case SHA256_DIGEST_SIZE: + if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_512) + rc = -EINVAL; + else + tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_256; + break; + case SHA384_DIGEST_SIZE: + if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024) + rc = -EINVAL; + else + tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_384; + break; + case SHA512_DIGEST_SIZE: + if (tfm_ctx->pk.type != PKEY_KEYTYPE_HMAC_1024) + rc = -EINVAL; + else + tfm_ctx->fc = CPACF_KMAC_PHMAC_SHA_512; + break; + default: + tfm_ctx->fc = 0; + rc = -EINVAL; + } + +out: + pr_debug("rc=%d\n", rc); + return rc; +} + +static int phmac_export(struct ahash_request *req, void *out) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx; + + memcpy(out, ctx, sizeof(*ctx)); + + return 0; +} + +static int phmac_import(struct ahash_request *req, const void *in) +{ + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *ctx = &req_ctx->kmac_ctx; + + memset(req_ctx, 0, sizeof(*req_ctx)); + memcpy(ctx, in, sizeof(*ctx)); + + return 0; +} + +static int phmac_init_tfm(struct crypto_ahash *tfm) +{ + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + + memset(tfm_ctx, 0, sizeof(*tfm_ctx)); + spin_lock_init(&tfm_ctx->pk_lock); + + crypto_ahash_set_reqsize(tfm, sizeof(struct phmac_req_ctx)); + + return 0; +} + +static void phmac_exit_tfm(struct crypto_ahash *tfm) +{ + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + + memzero_explicit(tfm_ctx->keybuf, sizeof(tfm_ctx->keybuf)); + memzero_explicit(&tfm_ctx->pk, sizeof(tfm_ctx->pk)); +} + +static int phmac_do_one_request(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); + struct phmac_req_ctx *req_ctx = ahash_request_ctx(req); + struct kmac_sha2_ctx *kmac_ctx = &req_ctx->kmac_ctx; + struct hash_walk_helper *hwh = &req_ctx->hwh; + int rc = -EINVAL; + + /* + * Three kinds of requests come in here: + * update when req->nbytes > 0 and req_ctx->final is false + * final when req->nbytes = 0 and req_ctx->final is true + * finup when req->nbytes > 0 and req_ctx->final is true + * For update and finup the hwh walk needs to be prepared and + * up to date but the actual nr of bytes in req->nbytes may be + * any non zero number. For final there is no hwh walk needed. + */ + + if (req->nbytes) { + rc = phmac_kmac_update(req, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell scheduler to voluntarily give up the CPU here. + */ + pr_debug("rescheduling request\n"); + cond_resched(); + return -ENOSPC; + } else if (rc) { + hwh_advance(hwh, rc); + goto out; + } + req->nbytes = 0; + } + + if (req_ctx->final) { + rc = phmac_kmac_final(req, true); + if (rc == -EKEYEXPIRED) { + /* + * Protected key expired, conversion is in process. + * Trigger a re-schedule of this request by returning + * -ENOSPC ("hardware queue full") to the crypto engine. + * To avoid immediately re-invocation of this callback, + * tell scheduler to voluntarily give up the CPU here. + */ + pr_debug("rescheduling request\n"); + cond_resched(); + return -ENOSPC; + } + } + +out: + if (rc || req_ctx->final) + memzero_explicit(kmac_ctx, sizeof(*kmac_ctx)); + pr_debug("request complete with rc=%d\n", rc); + local_bh_disable(); + atomic_dec(&tfm_ctx->via_engine_ctr); + crypto_finalize_hash_request(engine, req, rc); + local_bh_enable(); + return rc; +} + +#define S390_ASYNC_PHMAC_ALG(x) \ +{ \ + .base = { \ + .init = phmac_init, \ + .update = phmac_update, \ + .final = phmac_final, \ + .finup = phmac_finup, \ + .digest = phmac_digest, \ + .setkey = phmac_setkey, \ + .import = phmac_import, \ + .export = phmac_export, \ + .init_tfm = phmac_init_tfm, \ + .exit_tfm = phmac_exit_tfm, \ + .halg = { \ + .digestsize = SHA##x##_DIGEST_SIZE, \ + .statesize = sizeof(struct kmac_sha2_ctx), \ + .base = { \ + .cra_name = "phmac(sha" #x ")", \ + .cra_driver_name = "phmac_s390_sha" #x, \ + .cra_blocksize = SHA##x##_BLOCK_SIZE, \ + .cra_priority = 400, \ + .cra_flags = CRYPTO_ALG_ASYNC | \ + CRYPTO_ALG_NO_FALLBACK, \ + .cra_ctxsize = sizeof(struct phmac_tfm_ctx), \ + .cra_module = THIS_MODULE, \ + }, \ + }, \ + }, \ + .op = { \ + .do_one_request = phmac_do_one_request, \ + }, \ +} + +static struct phmac_alg { + unsigned int fc; + struct ahash_engine_alg alg; + bool registered; +} phmac_algs[] = { + { + .fc = CPACF_KMAC_PHMAC_SHA_224, + .alg = S390_ASYNC_PHMAC_ALG(224), + }, { + .fc = CPACF_KMAC_PHMAC_SHA_256, + .alg = S390_ASYNC_PHMAC_ALG(256), + }, { + .fc = CPACF_KMAC_PHMAC_SHA_384, + .alg = S390_ASYNC_PHMAC_ALG(384), + }, { + .fc = CPACF_KMAC_PHMAC_SHA_512, + .alg = S390_ASYNC_PHMAC_ALG(512), + } +}; + +static struct miscdevice phmac_dev = { + .name = "phmac", + .minor = MISC_DYNAMIC_MINOR, +}; + +static void s390_phmac_exit(void) +{ + struct phmac_alg *phmac; + int i; + + if (phmac_crypto_engine) { + crypto_engine_stop(phmac_crypto_engine); + crypto_engine_exit(phmac_crypto_engine); + } + + for (i = ARRAY_SIZE(phmac_algs) - 1; i >= 0; i--) { + phmac = &phmac_algs[i]; + if (phmac->registered) + crypto_engine_unregister_ahash(&phmac->alg); + } + + misc_deregister(&phmac_dev); +} + +static int __init s390_phmac_init(void) +{ + struct phmac_alg *phmac; + int i, rc; + + /* register a simple phmac pseudo misc device */ + rc = misc_register(&phmac_dev); + if (rc) + return rc; + + /* with this pseudo device alloc and start a crypto engine */ + phmac_crypto_engine = + crypto_engine_alloc_init_and_set(phmac_dev.this_device, + true, NULL, false, MAX_QLEN); + if (!phmac_crypto_engine) { + rc = -ENOMEM; + goto out_err; + } + rc = crypto_engine_start(phmac_crypto_engine); + if (rc) { + crypto_engine_exit(phmac_crypto_engine); + phmac_crypto_engine = NULL; + goto out_err; + } + + for (i = 0; i < ARRAY_SIZE(phmac_algs); i++) { + phmac = &phmac_algs[i]; + if (!cpacf_query_func(CPACF_KMAC, phmac->fc)) + continue; + rc = crypto_engine_register_ahash(&phmac->alg); + if (rc) + goto out_err; + phmac->registered = true; + pr_debug("%s registered\n", phmac->alg.base.halg.base.cra_name); + } + + return 0; + +out_err: + s390_phmac_exit(); + return rc; +} + +module_init(s390_phmac_init); +module_exit(s390_phmac_exit); + +MODULE_ALIAS_CRYPTO("phmac(sha224)"); +MODULE_ALIAS_CRYPTO("phmac(sha256)"); +MODULE_ALIAS_CRYPTO("phmac(sha384)"); +MODULE_ALIAS_CRYPTO("phmac(sha512)"); + +MODULE_DESCRIPTION("S390 HMAC driver for protected keys"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 9f8a3a5bed7e..04b4c43b6bae 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -188,6 +188,19 @@ config CRYPTO_PAES_S390 Select this option if you want to use the paes cipher for example to use protected key encrypted devices. +config CRYPTO_PHMAC_S390 + tristate "PHMAC cipher algorithms" + depends on S390 + depends on PKEY + select CRYPTO_HASH + select CRYPTO_ENGINE + help + This is the s390 hardware accelerated implementation of the + protected key HMAC support for SHA224, SHA256, SHA384 and SHA512. + + Select this option if you want to use the phmac digests + for example to use dm-integrity with secure/protected keys. + config S390_PRNG tristate "Pseudo random number generator device driver" depends on S390 From d0da164ba63dd6ff3696c565b118631a277822c3 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 17 Jun 2025 15:44:38 +0200 Subject: [PATCH 065/116] crypto: ahash - Add crypto_ahash_tested() helper function Add a little inline helper function crypto_ahash_tested() to the internal/hash.h header file to retrieve the tested status (that is the CRYPTO_ALG_TESTED bit in the cra_flags). Signed-off-by: Harald Freudenberger Suggested-by: Herbert Xu Reviewed-by: Holger Dengler Signed-off-by: Herbert Xu --- include/crypto/internal/hash.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h index 2456d6ea73f0..6ec5f2f37ccb 100644 --- a/include/crypto/internal/hash.h +++ b/include/crypto/internal/hash.h @@ -196,6 +196,13 @@ static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm, tfm->reqsize = reqsize; } +static inline bool crypto_ahash_tested(struct crypto_ahash *tfm) +{ + struct crypto_tfm *tfm_base = crypto_ahash_tfm(tfm); + + return tfm_base->__crt_alg->cra_flags & CRYPTO_ALG_TESTED; +} + static inline void crypto_ahash_set_reqsize_dma(struct crypto_ahash *ahash, unsigned int reqsize) { From d48b2f5e82ea3888fb23659675c05f490899a293 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 17 Jun 2025 15:44:39 +0200 Subject: [PATCH 066/116] crypto: s390 - Add selftest support for phmac Add key preparation code in case of selftest running to the phmac setkey function: As long as crypto_ahash_tested() returns with false, all setkey() invocations are assumed to carry sheer hmac clear key values and thus need some preparation to work with the phmac implementation. Thus it is possible to use the already available hmac test vectors implemented in the testmanager to test the phmac code. When crypto_ahash_tested() returns true (that is after larval state) the phmac code assumes the key material is a blob digestible by the pkey kernel module which converts the blob into a working key for the phmac code. Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Signed-off-by: Herbert Xu --- arch/s390/crypto/phmac_s390.c | 137 ++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c index 8473af47f5b9..90602f72108f 100644 --- a/arch/s390/crypto/phmac_s390.c +++ b/arch/s390/crypto/phmac_s390.c @@ -176,6 +176,114 @@ struct phmac_req_ctx { bool final; }; +/* + * Pkey 'token' struct used to derive a protected key value from a clear key. + */ +struct hmac_clrkey_token { + u8 type; + u8 res0[3]; + u8 version; + u8 res1[3]; + u32 keytype; + u32 len; + u8 key[]; +} __packed; + +static int hash_key(const u8 *in, unsigned int inlen, + u8 *digest, unsigned int digestsize) +{ + unsigned long func; + union { + struct sha256_paramblock { + u32 h[8]; + u64 mbl; + } sha256; + struct sha512_paramblock { + u64 h[8]; + u128 mbl; + } sha512; + } __packed param; + +#define PARAM_INIT(x, y, z) \ + param.sha##x.h[0] = SHA##y ## _H0; \ + param.sha##x.h[1] = SHA##y ## _H1; \ + param.sha##x.h[2] = SHA##y ## _H2; \ + param.sha##x.h[3] = SHA##y ## _H3; \ + param.sha##x.h[4] = SHA##y ## _H4; \ + param.sha##x.h[5] = SHA##y ## _H5; \ + param.sha##x.h[6] = SHA##y ## _H6; \ + param.sha##x.h[7] = SHA##y ## _H7; \ + param.sha##x.mbl = (z) + + switch (digestsize) { + case SHA224_DIGEST_SIZE: + func = CPACF_KLMD_SHA_256; + PARAM_INIT(256, 224, inlen * 8); + break; + case SHA256_DIGEST_SIZE: + func = CPACF_KLMD_SHA_256; + PARAM_INIT(256, 256, inlen * 8); + break; + case SHA384_DIGEST_SIZE: + func = CPACF_KLMD_SHA_512; + PARAM_INIT(512, 384, inlen * 8); + break; + case SHA512_DIGEST_SIZE: + func = CPACF_KLMD_SHA_512; + PARAM_INIT(512, 512, inlen * 8); + break; + default: + return -EINVAL; + } + +#undef PARAM_INIT + + cpacf_klmd(func, ¶m, in, inlen); + + memcpy(digest, ¶m, digestsize); + + return 0; +} + +/* + * make_clrkey_token() - wrap the clear key into a pkey clearkey token. + */ +static inline int make_clrkey_token(const u8 *clrkey, size_t clrkeylen, + unsigned int digestsize, u8 *dest) +{ + struct hmac_clrkey_token *token = (struct hmac_clrkey_token *)dest; + unsigned int blocksize; + int rc; + + token->type = 0x00; + token->version = 0x02; + switch (digestsize) { + case SHA224_DIGEST_SIZE: + case SHA256_DIGEST_SIZE: + token->keytype = PKEY_KEYTYPE_HMAC_512; + blocksize = 64; + break; + case SHA384_DIGEST_SIZE: + case SHA512_DIGEST_SIZE: + token->keytype = PKEY_KEYTYPE_HMAC_1024; + blocksize = 128; + break; + default: + return -EINVAL; + } + token->len = blocksize; + + if (clrkeylen > blocksize) { + rc = hash_key(clrkey, clrkeylen, token->key, digestsize); + if (rc) + return rc; + } else { + memcpy(token->key, clrkey, clrkeylen); + } + + return 0; +} + /* * phmac_tfm_ctx_setkey() - Set key value into tfm context, maybe construct * a clear key token digestible by pkey from a clear key value. @@ -623,8 +731,30 @@ static int phmac_setkey(struct crypto_ahash *tfm, { struct phmac_tfm_ctx *tfm_ctx = crypto_ahash_ctx(tfm); unsigned int ds = crypto_ahash_digestsize(tfm); + unsigned int bs = crypto_ahash_blocksize(tfm); + unsigned int tmpkeylen; + u8 *tmpkey = NULL; int rc = 0; + if (!crypto_ahash_tested(tfm)) { + /* + * selftest running: key is a raw hmac clear key and needs + * to get embedded into a 'clear key token' in order to have + * it correctly processed by the pkey module. + */ + tmpkeylen = sizeof(struct hmac_clrkey_token) + bs; + tmpkey = kzalloc(tmpkeylen, GFP_KERNEL); + if (!tmpkey) { + rc = -ENOMEM; + goto out; + } + rc = make_clrkey_token(key, keylen, ds, tmpkey); + if (rc) + goto out; + keylen = tmpkeylen; + key = tmpkey; + } + /* copy raw key into tfm context */ rc = phmac_tfm_ctx_setkey(tfm_ctx, key, keylen); if (rc) @@ -667,6 +797,7 @@ static int phmac_setkey(struct crypto_ahash *tfm, } out: + kfree(tmpkey); pr_debug("rc=%d\n", rc); return rc; } @@ -861,6 +992,12 @@ static int __init s390_phmac_init(void) struct phmac_alg *phmac; int i, rc; + /* for selftest cpacf klmd subfunction is needed */ + if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_256)) + return -ENODEV; + if (!cpacf_query_func(CPACF_KLMD, CPACF_KLMD_SHA_512)) + return -ENODEV; + /* register a simple phmac pseudo misc device */ rc = misc_register(&phmac_dev); if (rc) From a71d3e1beb7a9637eb75929b995f01d20981f013 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Tue, 17 Jun 2025 15:44:40 +0200 Subject: [PATCH 067/116] crypto: testmgr - Enable phmac selftest Add phmac selftest invocation to the crypto testmanager. Signed-off-by: Harald Freudenberger Acked-by: Holger Dengler Signed-off-by: Herbert Xu --- crypto/testmgr.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 196509c44d47..97190d9dcc0e 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5310,6 +5310,36 @@ static const struct alg_test_desc alg_test_descs[] = { .cipher = __VECS(fcrypt_pcbc_tv_template) } }, { +#if IS_ENABLED(CONFIG_CRYPTO_PHMAC_S390) + .alg = "phmac(sha224)", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = __VECS(hmac_sha224_tv_template) + } + }, { + .alg = "phmac(sha256)", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = __VECS(hmac_sha256_tv_template) + } + }, { + .alg = "phmac(sha384)", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = __VECS(hmac_sha384_tv_template) + } + }, { + .alg = "phmac(sha512)", + .test = alg_test_hash, + .fips_allowed = 1, + .suite = { + .hash = __VECS(hmac_sha512_tv_template) + } + }, { +#endif .alg = "pkcs1(rsa,none)", .test = alg_test_sig, .suite = { From b4abeccb8d39db7d9b51cb0098d6458760b30a75 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 17 Jun 2025 19:43:54 +1000 Subject: [PATCH 068/116] crypto: ccp - Fix locking on alloc failure handling The __snp_alloc_firmware_pages() helper allocates pages in the firmware state (alloc + rmpupdate). In case of failed rmpupdate, it tries reclaiming pages with already changed state. This requires calling the PSP firmware and since there is sev_cmd_mutex to guard such calls, the helper takes a "locked" parameter so specify if the lock needs to be held. Most calls happen from snp_alloc_firmware_page() which executes without the lock. However commit 24512afa4336 ("crypto: ccp: Handle the legacy TMR allocation when SNP is enabled") switched sev_fw_alloc() from alloc_pages() (which does not call the PSP) to __snp_alloc_firmware_pages() (which does) but did not account for the fact that sev_fw_alloc() is called from __sev_platform_init_locked() (via __sev_platform_init_handle_tmr()) and executes with the lock held. Add a "locked" parameter to __snp_alloc_firmware_pages(). Make sev_fw_alloc() use the new parameter to prevent potential deadlock in rmp_mark_pages_firmware() if rmpupdate() failed. Fixes: 24512afa4336 ("crypto: ccp: Handle the legacy TMR allocation when SNP is enabled") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Tom Lendacky Reviewed-by: Pratik R. Sampat Signed-off-by: Herbert Xu --- drivers/crypto/ccp/sev-dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 17edc6bf5622..e058ba027792 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -434,7 +434,7 @@ static int rmp_mark_pages_firmware(unsigned long paddr, unsigned int npages, boo return rc; } -static struct page *__snp_alloc_firmware_pages(gfp_t gfp_mask, int order) +static struct page *__snp_alloc_firmware_pages(gfp_t gfp_mask, int order, bool locked) { unsigned long npages = 1ul << order, paddr; struct sev_device *sev; @@ -453,7 +453,7 @@ static struct page *__snp_alloc_firmware_pages(gfp_t gfp_mask, int order) return page; paddr = __pa((unsigned long)page_address(page)); - if (rmp_mark_pages_firmware(paddr, npages, false)) + if (rmp_mark_pages_firmware(paddr, npages, locked)) return NULL; return page; @@ -463,7 +463,7 @@ void *snp_alloc_firmware_page(gfp_t gfp_mask) { struct page *page; - page = __snp_alloc_firmware_pages(gfp_mask, 0); + page = __snp_alloc_firmware_pages(gfp_mask, 0, false); return page ? page_address(page) : NULL; } @@ -498,7 +498,7 @@ static void *sev_fw_alloc(unsigned long len) { struct page *page; - page = __snp_alloc_firmware_pages(GFP_KERNEL, get_order(len)); + page = __snp_alloc_firmware_pages(GFP_KERNEL, get_order(len), true); if (!page) return NULL; From cb7fa6b6fc71e0c801e271aa498e2f19e6df2931 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Fri, 20 Jun 2025 09:29:26 +0200 Subject: [PATCH 069/116] crypto: inside-secure - Fix `dma_unmap_sg()` nents value The `dma_unmap_sg()` functions should be called with the same nents as the `dma_map_sg()`, not the value the map function returned. Fixes: c957f8b3e2e5 ("crypto: inside-secure - avoid unmapping DMA memory that was not mapped") Signed-off-by: Thomas Fourier Reviewed-by: Antoine Tenart Signed-off-by: Herbert Xu --- drivers/crypto/inside-secure/safexcel_hash.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index d2b632193beb..5baf4bd2fcee 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -249,7 +249,9 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, safexcel_complete(priv, ring); if (sreq->nents) { - dma_unmap_sg(priv->dev, areq->src, sreq->nents, DMA_TO_DEVICE); + dma_unmap_sg(priv->dev, areq->src, + sg_nents_for_len(areq->src, areq->nbytes), + DMA_TO_DEVICE); sreq->nents = 0; } @@ -497,7 +499,9 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring, DMA_FROM_DEVICE); unmap_sg: if (req->nents) { - dma_unmap_sg(priv->dev, areq->src, req->nents, DMA_TO_DEVICE); + dma_unmap_sg(priv->dev, areq->src, + sg_nents_for_len(areq->src, areq->nbytes), + DMA_TO_DEVICE); req->nents = 0; } cdesc_rollback: From 735b72568c73875269a6b73ab9543a70f6ac8a9f Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Sat, 21 Jun 2025 13:36:43 +0200 Subject: [PATCH 070/116] crypto: jitter - fix intermediary handling The intermediary value was included in the wrong hash state. While there, adapt to user-space by setting the timestamp to 0 if stuck and inserting the values nevertheless. Acked-by: Stephan Mueller Signed-off-by: Markus Theil Signed-off-by: Herbert Xu --- crypto/jitterentropy-kcapi.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/crypto/jitterentropy-kcapi.c b/crypto/jitterentropy-kcapi.c index c24d4ff2b4a8..1266eb790708 100644 --- a/crypto/jitterentropy-kcapi.c +++ b/crypto/jitterentropy-kcapi.c @@ -144,7 +144,7 @@ int jent_hash_time(void *hash_state, __u64 time, u8 *addtl, * Inject the data from the previous loop into the pool. This data is * not considered to contain any entropy, but it stirs the pool a bit. */ - ret = crypto_shash_update(desc, intermediary, sizeof(intermediary)); + ret = crypto_shash_update(hash_state_desc, intermediary, sizeof(intermediary)); if (ret) goto err; @@ -157,11 +157,12 @@ int jent_hash_time(void *hash_state, __u64 time, u8 *addtl, * conditioning operation to have an identical amount of input data * according to section 3.1.5. */ - if (!stuck) { - ret = crypto_shash_update(hash_state_desc, (u8 *)&time, - sizeof(__u64)); + if (stuck) { + time = 0; } + ret = crypto_shash_update(hash_state_desc, (u8 *)&time, sizeof(__u64)); + err: shash_desc_zero(desc); memzero_explicit(intermediary, sizeof(intermediary)); From 181698af38d3f93381229ad89c09b5bd0496661a Mon Sep 17 00:00:00 2001 From: Mengbiao Xiong Date: Tue, 24 Jun 2025 14:54:18 +0800 Subject: [PATCH 071/116] crypto: ccp - Fix crash when rebind ccp device for ccp.ko When CONFIG_CRYPTO_DEV_CCP_DEBUGFS is enabled, rebinding the ccp device causes the following crash: $ echo '0000:0a:00.2' > /sys/bus/pci/drivers/ccp/unbind $ echo '0000:0a:00.2' > /sys/bus/pci/drivers/ccp/bind [ 204.976930] BUG: kernel NULL pointer dereference, address: 0000000000000098 [ 204.978026] #PF: supervisor write access in kernel mode [ 204.979126] #PF: error_code(0x0002) - not-present page [ 204.980226] PGD 0 P4D 0 [ 204.981317] Oops: Oops: 0002 [#1] SMP NOPTI ... [ 204.997852] Call Trace: [ 204.999074] [ 205.000297] start_creating+0x9f/0x1c0 [ 205.001533] debugfs_create_dir+0x1f/0x170 [ 205.002769] ? srso_return_thunk+0x5/0x5f [ 205.004000] ccp5_debugfs_setup+0x87/0x170 [ccp] [ 205.005241] ccp5_init+0x8b2/0x960 [ccp] [ 205.006469] ccp_dev_init+0xd4/0x150 [ccp] [ 205.007709] sp_init+0x5f/0x80 [ccp] [ 205.008942] sp_pci_probe+0x283/0x2e0 [ccp] [ 205.010165] ? srso_return_thunk+0x5/0x5f [ 205.011376] local_pci_probe+0x4f/0xb0 [ 205.012584] pci_device_probe+0xdb/0x230 [ 205.013810] really_probe+0xed/0x380 [ 205.015024] __driver_probe_device+0x7e/0x160 [ 205.016240] device_driver_attach+0x2f/0x60 [ 205.017457] bind_store+0x7c/0xb0 [ 205.018663] drv_attr_store+0x28/0x40 [ 205.019868] sysfs_kf_write+0x5f/0x70 [ 205.021065] kernfs_fop_write_iter+0x145/0x1d0 [ 205.022267] vfs_write+0x308/0x440 [ 205.023453] ksys_write+0x6d/0xe0 [ 205.024616] __x64_sys_write+0x1e/0x30 [ 205.025778] x64_sys_call+0x16ba/0x2150 [ 205.026942] do_syscall_64+0x56/0x1e0 [ 205.028108] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 205.029276] RIP: 0033:0x7fbc36f10104 [ 205.030420] Code: 89 02 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 8d 05 e1 08 2e 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 f3 c3 66 90 41 54 55 49 89 d4 53 48 89 f5 This patch sets ccp_debugfs_dir to NULL after destroying it in ccp5_debugfs_destroy, allowing the directory dentry to be recreated when rebinding the ccp device. Tested on AMD Ryzen 7 1700X. Fixes: 3cdbe346ed3f ("crypto: ccp - Add debugfs entries for CCP information") Signed-off-by: Mengbiao Xiong Reviewed-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/ccp/ccp-debugfs.c b/drivers/crypto/ccp/ccp-debugfs.c index a1055554b47a..dc26bc22c91d 100644 --- a/drivers/crypto/ccp/ccp-debugfs.c +++ b/drivers/crypto/ccp/ccp-debugfs.c @@ -319,5 +319,8 @@ void ccp5_debugfs_setup(struct ccp_device *ccp) void ccp5_debugfs_destroy(void) { + mutex_lock(&ccp_debugfs_lock); debugfs_remove_recursive(ccp_debugfs_dir); + ccp_debugfs_dir = NULL; + mutex_unlock(&ccp_debugfs_lock); } From 03ba056e63d3b2d3774c7a8cf40f5a31b968b612 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Mon, 30 Jun 2025 10:24:18 +0100 Subject: [PATCH 072/116] crypto: zstd - fix duplicate check warning Fix the following warnings reported by the static analyzer Smatch: crypto/zstd.c:273 zstd_decompress() warn: duplicate check 'scur' (previous on line 235) Fixes: f5ad93ffb541 ("crypto: zstd - convert to acomp") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/linux-crypto/92929e50-5650-40be-8c0a-de81e77f0acf@sabinyo.mountain/ Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- crypto/zstd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crypto/zstd.c b/crypto/zstd.c index 657e0cf7b952..24edb4d616b5 100644 --- a/crypto/zstd.c +++ b/crypto/zstd.c @@ -268,10 +268,9 @@ static int zstd_decompress(struct acomp_req *req) total_out += outbuf.pos; acomp_walk_done_dst(&walk, outbuf.pos); - } while (scur != inbuf.pos); + } while (inbuf.pos != scur); - if (scur) - acomp_walk_done_src(&walk, scur); + acomp_walk_done_src(&walk, scur); } while (ret == 0); out: From 25f4e1d7193d1438715bb0f05b6aa8df3f7b434f Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 3 Jul 2025 19:19:34 +0200 Subject: [PATCH 073/116] crypto: zstd - replace zero-length array with flexible array member Replace the deprecated zero-length array with a modern flexible array member in the struct zstd_ctx. No functional changes intended. Link: https://github.com/KSPP/linux/issues/78 Signed-off-by: Thorsten Blum Reviewed-by: Kees Cook Signed-off-by: Herbert Xu --- crypto/zstd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/zstd.c b/crypto/zstd.c index 24edb4d616b5..ebeadc1f3b5f 100644 --- a/crypto/zstd.c +++ b/crypto/zstd.c @@ -25,7 +25,7 @@ struct zstd_ctx { zstd_dctx *dctx; size_t wksp_size; zstd_parameters params; - u8 wksp[0] __aligned(8); + u8 wksp[] __aligned(8); }; static DEFINE_MUTEX(zstd_stream_lock); From 522a242a18adc5c63a24836715dbeec4dc3faee1 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Sun, 29 Jun 2025 20:31:41 +0300 Subject: [PATCH 074/116] hwrng: mtk - handle devm_pm_runtime_enable errors Although unlikely, devm_pm_runtime_enable() call might fail, so handle the return value. Fixes: 78cb66caa6ab ("hwrng: mtk - Use devm_pm_runtime_enable") Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu --- drivers/char/hw_random/mtk-rng.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c index b7fa1bc1122b..d09a4d813766 100644 --- a/drivers/char/hw_random/mtk-rng.c +++ b/drivers/char/hw_random/mtk-rng.c @@ -143,7 +143,9 @@ static int mtk_rng_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, priv); pm_runtime_set_autosuspend_delay(&pdev->dev, RNG_AUTOSUSPEND_TIMEOUT); pm_runtime_use_autosuspend(&pdev->dev); - devm_pm_runtime_enable(&pdev->dev); + ret = devm_pm_runtime_enable(&pdev->dev); + if (ret) + return ret; dev_info(&pdev->dev, "registered RNG driver\n"); From 01951a7dc5ac1a37e5fb7d86ea7eb2dfbf96e8b6 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Mon, 30 Jun 2025 10:57:06 +0200 Subject: [PATCH 075/116] crypto: keembay - Fix dma_unmap_sg() nents value The dma_unmap_sg() functions should be called with the same nents as the dma_map_sg(), not the value the map function returned. Fixes: 472b04444cd3 ("crypto: keembay - Add Keem Bay OCS HCU driver") Signed-off-by: Thomas Fourier Signed-off-by: Herbert Xu --- drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c index 95dc8979918d..8f9e21ced0fe 100644 --- a/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c +++ b/drivers/crypto/intel/keembay/keembay-ocs-hcu-core.c @@ -68,6 +68,7 @@ struct ocs_hcu_ctx { * @sg_data_total: Total data in the SG list at any time. * @sg_data_offset: Offset into the data of the current individual SG node. * @sg_dma_nents: Number of sg entries mapped in dma_list. + * @nents: Number of entries in the scatterlist. */ struct ocs_hcu_rctx { struct ocs_hcu_dev *hcu_dev; @@ -91,6 +92,7 @@ struct ocs_hcu_rctx { unsigned int sg_data_total; unsigned int sg_data_offset; unsigned int sg_dma_nents; + unsigned int nents; }; /** @@ -199,7 +201,7 @@ static void kmb_ocs_hcu_dma_cleanup(struct ahash_request *req, /* Unmap req->src (if mapped). */ if (rctx->sg_dma_nents) { - dma_unmap_sg(dev, req->src, rctx->sg_dma_nents, DMA_TO_DEVICE); + dma_unmap_sg(dev, req->src, rctx->nents, DMA_TO_DEVICE); rctx->sg_dma_nents = 0; } @@ -260,6 +262,10 @@ static int kmb_ocs_dma_prepare(struct ahash_request *req) rc = -ENOMEM; goto cleanup; } + + /* Save the value of nents to pass to dma_unmap_sg. */ + rctx->nents = nents; + /* * The value returned by dma_map_sg() can be < nents; so update * nents accordingly. From 34b283636181ce02c52633551f594fec9876bec7 Mon Sep 17 00:00:00 2001 From: Thomas Fourier Date: Mon, 30 Jun 2025 11:16:22 +0200 Subject: [PATCH 076/116] crypto: img-hash - Fix dma_unmap_sg() nents value The dma_unmap_sg() functions should be called with the same nents as the dma_map_sg(), not the value the map function returned. Fixes: d358f1abbf71 ("crypto: img-hash - Add Imagination Technologies hw hash accelerator") Signed-off-by: Thomas Fourier Signed-off-by: Herbert Xu --- drivers/crypto/img-hash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index e050f5ff5efb..c527cd75b6fe 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -436,7 +436,7 @@ static int img_hash_write_via_dma_stop(struct img_hash_dev *hdev) struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); if (ctx->flags & DRIVER_FLAGS_SG) - dma_unmap_sg(hdev->dev, ctx->sg, ctx->dma_ct, DMA_TO_DEVICE); + dma_unmap_sg(hdev->dev, ctx->sg, 1, DMA_TO_DEVICE); return 0; } From d956692c7dd523b331d4556ee03def8dd02609dc Mon Sep 17 00:00:00 2001 From: Bairavi Alagappan Date: Mon, 30 Jun 2025 10:20:49 +0100 Subject: [PATCH 077/116] crypto: qat - disable ZUC-256 capability for QAT GEN5 The ZUC-256 EEA (encryption) and EIA (integrity) algorithms are not supported on QAT GEN5 devices, as their current implementation does not align with the NIST specification. Earlier versions of the ZUC-256 specification used a different initialization scheme, which has since been revised to comply with the 5G specification. Due to this misalignment with the updated specification, remove support for ZUC-256 EEA and EIA for QAT GEN5 by masking out the ZUC-256 capability. Fixes: fcf60f4bcf549 ("crypto: qat - add support for 420xx devices") Signed-off-by: Bairavi Alagappan Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index 7c3c0f561c95..8340b5e8a947 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -191,7 +191,6 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) ICP_ACCEL_CAPABILITIES_SM4 | ICP_ACCEL_CAPABILITIES_AES_V2 | ICP_ACCEL_CAPABILITIES_ZUC | - ICP_ACCEL_CAPABILITIES_ZUC_256 | ICP_ACCEL_CAPABILITIES_WIRELESS_CRYPTO_EXT | ICP_ACCEL_CAPABILITIES_EXT_ALGCHAIN; @@ -223,17 +222,11 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) if (fusectl1 & ICP_ACCEL_GEN4_MASK_WCP_WAT_SLICE) { capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC; - capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC_256; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_WIRELESS_CRYPTO_EXT; } - if (fusectl1 & ICP_ACCEL_GEN4_MASK_EIA3_SLICE) { + if (fusectl1 & ICP_ACCEL_GEN4_MASK_EIA3_SLICE) capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC; - capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC_256; - } - - if (fusectl1 & ICP_ACCEL_GEN4_MASK_ZUC_256_SLICE) - capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC_256; capabilities_asym = ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC | ICP_ACCEL_CAPABILITIES_SM2 | From 590f8a67ba3cafba48e62d20ee03ab7d9a2c51f9 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 1 Jul 2025 08:09:36 +0200 Subject: [PATCH 078/116] crypto: cryptd - Use nested-BH locking for cryptd_cpu_queue cryptd_queue::cryptd_cpu_queue is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Add a local_lock_t to the struct cryptd_cpu_queue and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Cc: "David S. Miller" Cc: Herbert Xu Cc: linux-crypto@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Herbert Xu --- crypto/cryptd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 5bb6f8d88cc2..efff54e707cb 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -34,6 +34,7 @@ MODULE_PARM_DESC(cryptd_max_cpu_qlen, "Set cryptd Max queue depth"); static struct workqueue_struct *cryptd_wq; struct cryptd_cpu_queue { + local_lock_t bh_lock; struct crypto_queue queue; struct work_struct work; }; @@ -110,6 +111,7 @@ static int cryptd_init_queue(struct cryptd_queue *queue, cpu_queue = per_cpu_ptr(queue->cpu_queue, cpu); crypto_init_queue(&cpu_queue->queue, max_cpu_qlen); INIT_WORK(&cpu_queue->work, cryptd_queue_worker); + local_lock_init(&cpu_queue->bh_lock); } pr_info("cryptd: max_cpu_qlen set to %d\n", max_cpu_qlen); return 0; @@ -135,6 +137,7 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue, refcount_t *refcnt; local_bh_disable(); + local_lock_nested_bh(&queue->cpu_queue->bh_lock); cpu_queue = this_cpu_ptr(queue->cpu_queue); err = crypto_enqueue_request(&cpu_queue->queue, request); @@ -151,6 +154,7 @@ static int cryptd_enqueue_request(struct cryptd_queue *queue, refcount_inc(refcnt); out: + local_unlock_nested_bh(&queue->cpu_queue->bh_lock); local_bh_enable(); return err; @@ -169,8 +173,10 @@ static void cryptd_queue_worker(struct work_struct *work) * Only handle one request at a time to avoid hogging crypto workqueue. */ local_bh_disable(); + __local_lock_nested_bh(&cpu_queue->bh_lock); backlog = crypto_get_backlog(&cpu_queue->queue); req = crypto_dequeue_request(&cpu_queue->queue); + __local_unlock_nested_bh(&cpu_queue->bh_lock); local_bh_enable(); if (!req) From 7ea5ea3e58b037e0751c6406cea1e048892be4de Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Tue, 1 Jul 2025 10:47:26 +0100 Subject: [PATCH 079/116] crypto: qat - use pr_fmt() in adf_gen4_hw_data.c Add pr_fmt() to adf_gen4_hw_data.c logging and update the debug and error messages to utilize it accordingly. This does not introduce any functional changes. Signed-off-by: Suman Kumar Chakraborty Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index 258adc0b49e0..e6a8954cbef1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2020 Intel Corporation */ + +#define pr_fmt(fmt) "QAT: " fmt + #include #include #include @@ -523,7 +526,7 @@ static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base, u32 __expect_val = (expect_val); \ u32 actual_val = op(args); \ (__expect_val == actual_val) ? 0 : \ - (pr_err("QAT: Fail to restore %s register. Expected 0x%x, actual 0x%x\n", \ + (pr_err("Fail to restore %s register. Expected 0x%x, actual 0x%x\n", \ name, __expect_val, actual_val), -EINVAL); \ }) @@ -593,7 +596,7 @@ static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, */ val = state->ringexpstat; if (val) { - pr_info("QAT: Bank %u state not fully restored due to exception in saved state (%#x)\n", + pr_info("Bank %u state not fully restored due to exception in saved state (%#x)\n", bank, val); return 0; } @@ -601,8 +604,7 @@ static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, /* Ensure that the restoration process completed without exceptions */ tmp_val = ops->read_csr_exp_stat(base, bank); if (tmp_val) { - pr_err("QAT: Bank %u restored with exception: %#x\n", - bank, tmp_val); + pr_err("Bank %u restored with exception: %#x\n", bank, tmp_val); return -EFAULT; } From 18126fdff42f6854123b3ddaf3b469a0cee80adb Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Tue, 1 Jul 2025 10:47:27 +0100 Subject: [PATCH 080/116] crypto: qat - replace CHECK_STAT macro with static inline function The macro CHECK_STAT is used to check that all ring statuses match the saved state during restoring the state of bank. Replace the CHECK_STAT macro with the static inline function `check_stat()` to improve type safety, readability, and debuggability. This does not introduce any functional change. Signed-off-by: Suman Kumar Chakraborty Signed-off-by: Herbert Xu --- .../intel/qat/qat_common/adf_gen4_hw_data.c | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index e6a8954cbef1..b5eef5235b61 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -521,14 +521,19 @@ static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base, } } -#define CHECK_STAT(op, expect_val, name, args...) \ -({ \ - u32 __expect_val = (expect_val); \ - u32 actual_val = op(args); \ - (__expect_val == actual_val) ? 0 : \ - (pr_err("Fail to restore %s register. Expected 0x%x, actual 0x%x\n", \ - name, __expect_val, actual_val), -EINVAL); \ -}) +static inline int check_stat(u32 (*op)(void __iomem *, u32), u32 expect_val, + const char *name, void __iomem *base, u32 bank) +{ + u32 actual_val = op(base, bank); + + if (expect_val == actual_val) + return 0; + + pr_err("Fail to restore %s register. Expected %#x, actual %#x\n", + name, expect_val, actual_val); + + return -EINVAL; +} static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, u32 bank, struct bank_state *state, u32 num_rings, @@ -611,32 +616,32 @@ static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, ops->write_csr_ring_srv_arb_en(base, bank, state->ringsrvarben); /* Check that all ring statuses match the saved state. */ - ret = CHECK_STAT(ops->read_csr_stat, state->ringstat0, "ringstat", + ret = check_stat(ops->read_csr_stat, state->ringstat0, "ringstat", base, bank); if (ret) return ret; - ret = CHECK_STAT(ops->read_csr_e_stat, state->ringestat, "ringestat", + ret = check_stat(ops->read_csr_e_stat, state->ringestat, "ringestat", base, bank); if (ret) return ret; - ret = CHECK_STAT(ops->read_csr_ne_stat, state->ringnestat, "ringnestat", + ret = check_stat(ops->read_csr_ne_stat, state->ringnestat, "ringnestat", base, bank); if (ret) return ret; - ret = CHECK_STAT(ops->read_csr_nf_stat, state->ringnfstat, "ringnfstat", + ret = check_stat(ops->read_csr_nf_stat, state->ringnfstat, "ringnfstat", base, bank); if (ret) return ret; - ret = CHECK_STAT(ops->read_csr_f_stat, state->ringfstat, "ringfstat", + ret = check_stat(ops->read_csr_f_stat, state->ringfstat, "ringfstat", base, bank); if (ret) return ret; - ret = CHECK_STAT(ops->read_csr_c_stat, state->ringcstat0, "ringcstat", + ret = check_stat(ops->read_csr_c_stat, state->ringcstat0, "ringcstat", base, bank); if (ret) return ret; From e9eec2916c167c7867148254a8f945401a5f6a38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Mielnik?= Date: Tue, 1 Jul 2025 10:47:28 +0100 Subject: [PATCH 081/116] crypto: qat - relocate bank state helper functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing implementation of bank state management functions, including saving and restoring state, is located within 4xxx device files. However, these functions do not contain GEN4-specific code and are applicable to other QAT generations. Relocate the bank state management functions to a new file, adf_bank_state.c, and rename them removing the `gen4` prefix. This change enables the reuse of such functions across different QAT generations. Add documentation to bank state related functions that were moved from QAT 4xxx specific files to common files. This does not introduce any functional change. Signed-off-by: MaÅ‚gorzata Mielnik Signed-off-by: Herbert Xu --- .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 5 +- drivers/crypto/intel/qat/qat_common/Makefile | 1 + .../intel/qat/qat_common/adf_bank_state.c | 238 ++++++++++++++++++ .../intel/qat/qat_common/adf_bank_state.h | 16 ++ .../intel/qat/qat_common/adf_gen4_hw_data.c | 200 --------------- .../intel/qat/qat_common/adf_gen4_hw_data.h | 7 - 6 files changed, 258 insertions(+), 209 deletions(-) create mode 100644 drivers/crypto/intel/qat/qat_common/adf_bank_state.c create mode 100644 drivers/crypto/intel/qat/qat_common/adf_bank_state.h diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index bd0b1b1015c0..4d4889533558 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -448,8 +449,8 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map; hw_data->disable_iov = adf_disable_sriov; hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; - hw_data->bank_state_save = adf_gen4_bank_state_save; - hw_data->bank_state_restore = adf_gen4_bank_state_restore; + hw_data->bank_state_save = adf_bank_state_save; + hw_data->bank_state_restore = adf_bank_state_restore; hw_data->enable_pm = adf_gen4_enable_pm; hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt; hw_data->dev_config = adf_gen4_dev_config; diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 66bb295ace28..e426cc3c49c3 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -4,6 +4,7 @@ ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE='"CRYPTO_QAT"' intel_qat-y := adf_accel_engine.o \ adf_admin.o \ adf_aer.o \ + adf_bank_state.o \ adf_cfg.o \ adf_cfg_services.o \ adf_clock.o \ diff --git a/drivers/crypto/intel/qat/qat_common/adf_bank_state.c b/drivers/crypto/intel/qat/qat_common/adf_bank_state.c new file mode 100644 index 000000000000..2a0bbee8a288 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_bank_state.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 Intel Corporation */ + +#define pr_fmt(fmt) "QAT: " fmt + +#include +#include +#include +#include "adf_accel_devices.h" +#include "adf_bank_state.h" +#include "adf_common_drv.h" + +/* Ring interrupt masks */ +#define ADF_RP_INT_SRC_SEL_F_RISE_MASK GENMASK(1, 0) +#define ADF_RP_INT_SRC_SEL_F_FALL_MASK GENMASK(2, 0) +#define ADF_RP_INT_SRC_SEL_RANGE_WIDTH 4 + +static inline int check_stat(u32 (*op)(void __iomem *, u32), u32 expect_val, + const char *name, void __iomem *base, u32 bank) +{ + u32 actual_val = op(base, bank); + + if (expect_val == actual_val) + return 0; + + pr_err("Fail to restore %s register. Expected %#x, actual %#x\n", + name, expect_val, actual_val); + + return -EINVAL; +} + +static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base, + u32 bank, struct bank_state *state, u32 num_rings) +{ + u32 i; + + state->ringstat0 = ops->read_csr_stat(base, bank); + state->ringuostat = ops->read_csr_uo_stat(base, bank); + state->ringestat = ops->read_csr_e_stat(base, bank); + state->ringnestat = ops->read_csr_ne_stat(base, bank); + state->ringnfstat = ops->read_csr_nf_stat(base, bank); + state->ringfstat = ops->read_csr_f_stat(base, bank); + state->ringcstat0 = ops->read_csr_c_stat(base, bank); + state->iaintflagen = ops->read_csr_int_en(base, bank); + state->iaintflagreg = ops->read_csr_int_flag(base, bank); + state->iaintflagsrcsel0 = ops->read_csr_int_srcsel(base, bank); + state->iaintcolen = ops->read_csr_int_col_en(base, bank); + state->iaintcolctl = ops->read_csr_int_col_ctl(base, bank); + state->iaintflagandcolen = ops->read_csr_int_flag_and_col(base, bank); + state->ringexpstat = ops->read_csr_exp_stat(base, bank); + state->ringexpintenable = ops->read_csr_exp_int_en(base, bank); + state->ringsrvarben = ops->read_csr_ring_srv_arb_en(base, bank); + + for (i = 0; i < num_rings; i++) { + state->rings[i].head = ops->read_csr_ring_head(base, bank, i); + state->rings[i].tail = ops->read_csr_ring_tail(base, bank, i); + state->rings[i].config = ops->read_csr_ring_config(base, bank, i); + state->rings[i].base = ops->read_csr_ring_base(base, bank, i); + } +} + +static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, + u32 bank, struct bank_state *state, u32 num_rings, + int tx_rx_gap) +{ + u32 val, tmp_val, i; + int ret; + + for (i = 0; i < num_rings; i++) + ops->write_csr_ring_base(base, bank, i, state->rings[i].base); + + for (i = 0; i < num_rings; i++) + ops->write_csr_ring_config(base, bank, i, state->rings[i].config); + + for (i = 0; i < num_rings / 2; i++) { + int tx = i * (tx_rx_gap + 1); + int rx = tx + tx_rx_gap; + + ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); + ops->write_csr_ring_tail(base, bank, tx, state->rings[tx].tail); + + /* + * The TX ring head needs to be updated again to make sure that + * the HW will not consider the ring as full when it is empty + * and the correct state flags are set to match the recovered state. + */ + if (state->ringestat & BIT(tx)) { + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK; + ops->write_csr_int_srcsel_w_val(base, bank, val); + ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); + } + + ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; + ops->write_csr_int_srcsel_w_val(base, bank, val); + + ops->write_csr_ring_head(base, bank, rx, state->rings[rx].head); + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_FALL_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; + ops->write_csr_int_srcsel_w_val(base, bank, val); + + /* + * The RX ring tail needs to be updated again to make sure that + * the HW will not consider the ring as empty when it is full + * and the correct state flags are set to match the recovered state. + */ + if (state->ringfstat & BIT(rx)) + ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); + } + + ops->write_csr_int_flag_and_col(base, bank, state->iaintflagandcolen); + ops->write_csr_int_en(base, bank, state->iaintflagen); + ops->write_csr_int_col_en(base, bank, state->iaintcolen); + ops->write_csr_int_srcsel_w_val(base, bank, state->iaintflagsrcsel0); + ops->write_csr_exp_int_en(base, bank, state->ringexpintenable); + ops->write_csr_int_col_ctl(base, bank, state->iaintcolctl); + + /* + * Verify whether any exceptions were raised during the bank save process. + * If exceptions occurred, the status and exception registers cannot + * be directly restored. Consequently, further restoration is not + * feasible, and the current state of the ring should be maintained. + */ + val = state->ringexpstat; + if (val) { + pr_info("Bank %u state not fully restored due to exception in saved state (%#x)\n", + bank, val); + return 0; + } + + /* Ensure that the restoration process completed without exceptions */ + tmp_val = ops->read_csr_exp_stat(base, bank); + if (tmp_val) { + pr_err("Bank %u restored with exception: %#x\n", bank, tmp_val); + return -EFAULT; + } + + ops->write_csr_ring_srv_arb_en(base, bank, state->ringsrvarben); + + /* Check that all ring statuses match the saved state. */ + ret = check_stat(ops->read_csr_stat, state->ringstat0, "ringstat", + base, bank); + if (ret) + return ret; + + ret = check_stat(ops->read_csr_e_stat, state->ringestat, "ringestat", + base, bank); + if (ret) + return ret; + + ret = check_stat(ops->read_csr_ne_stat, state->ringnestat, "ringnestat", + base, bank); + if (ret) + return ret; + + ret = check_stat(ops->read_csr_nf_stat, state->ringnfstat, "ringnfstat", + base, bank); + if (ret) + return ret; + + ret = check_stat(ops->read_csr_f_stat, state->ringfstat, "ringfstat", + base, bank); + if (ret) + return ret; + + ret = check_stat(ops->read_csr_c_stat, state->ringcstat0, "ringcstat", + base, bank); + if (ret) + return ret; + + return 0; +} + +/** + * adf_bank_state_save() - save state of bank-related registers + * @accel_dev: Pointer to the device structure + * @bank_number: Bank number + * @state: Pointer to bank state structure + * + * This function saves the state of a bank by reading the bank CSRs and + * writing them in the @state structure. + * + * Returns 0 on success, error code otherwise + */ +int adf_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_base = adf_get_etr_base(accel_dev); + + if (bank_number >= hw_data->num_banks || !state) + return -EINVAL; + + dev_dbg(&GET_DEV(accel_dev), "Saving state of bank %d\n", bank_number); + + bank_state_save(csr_ops, csr_base, bank_number, state, + hw_data->num_rings_per_bank); + + return 0; +} +EXPORT_SYMBOL_GPL(adf_bank_state_save); + +/** + * adf_bank_state_restore() - restore state of bank-related registers + * @accel_dev: Pointer to the device structure + * @bank_number: Bank number + * @state: Pointer to bank state structure + * + * This function attempts to restore the state of a bank by writing the + * bank CSRs to the values in the state structure. + * + * Returns 0 on success, error code otherwise + */ +int adf_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_base = adf_get_etr_base(accel_dev); + int ret; + + if (bank_number >= hw_data->num_banks || !state) + return -EINVAL; + + dev_dbg(&GET_DEV(accel_dev), "Restoring state of bank %d\n", bank_number); + + ret = bank_state_restore(csr_ops, csr_base, bank_number, state, + hw_data->num_rings_per_bank, hw_data->tx_rx_gap); + if (ret) + dev_err(&GET_DEV(accel_dev), + "Unable to restore state of bank %d\n", bank_number); + + return ret; +} +EXPORT_SYMBOL_GPL(adf_bank_state_restore); diff --git a/drivers/crypto/intel/qat/qat_common/adf_bank_state.h b/drivers/crypto/intel/qat/qat_common/adf_bank_state.h new file mode 100644 index 000000000000..85b15ed161f4 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_bank_state.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2025 Intel Corporation */ +#ifndef ADF_BANK_STATE_H_ +#define ADF_BANK_STATE_H_ + +#include + +struct adf_accel_dev; +struct bank_state; + +int adf_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state); +int adf_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index b5eef5235b61..0dbf9cc2a858 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -491,206 +491,6 @@ int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev, return ret; } -static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base, - u32 bank, struct bank_state *state, u32 num_rings) -{ - u32 i; - - state->ringstat0 = ops->read_csr_stat(base, bank); - state->ringuostat = ops->read_csr_uo_stat(base, bank); - state->ringestat = ops->read_csr_e_stat(base, bank); - state->ringnestat = ops->read_csr_ne_stat(base, bank); - state->ringnfstat = ops->read_csr_nf_stat(base, bank); - state->ringfstat = ops->read_csr_f_stat(base, bank); - state->ringcstat0 = ops->read_csr_c_stat(base, bank); - state->iaintflagen = ops->read_csr_int_en(base, bank); - state->iaintflagreg = ops->read_csr_int_flag(base, bank); - state->iaintflagsrcsel0 = ops->read_csr_int_srcsel(base, bank); - state->iaintcolen = ops->read_csr_int_col_en(base, bank); - state->iaintcolctl = ops->read_csr_int_col_ctl(base, bank); - state->iaintflagandcolen = ops->read_csr_int_flag_and_col(base, bank); - state->ringexpstat = ops->read_csr_exp_stat(base, bank); - state->ringexpintenable = ops->read_csr_exp_int_en(base, bank); - state->ringsrvarben = ops->read_csr_ring_srv_arb_en(base, bank); - - for (i = 0; i < num_rings; i++) { - state->rings[i].head = ops->read_csr_ring_head(base, bank, i); - state->rings[i].tail = ops->read_csr_ring_tail(base, bank, i); - state->rings[i].config = ops->read_csr_ring_config(base, bank, i); - state->rings[i].base = ops->read_csr_ring_base(base, bank, i); - } -} - -static inline int check_stat(u32 (*op)(void __iomem *, u32), u32 expect_val, - const char *name, void __iomem *base, u32 bank) -{ - u32 actual_val = op(base, bank); - - if (expect_val == actual_val) - return 0; - - pr_err("Fail to restore %s register. Expected %#x, actual %#x\n", - name, expect_val, actual_val); - - return -EINVAL; -} - -static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, - u32 bank, struct bank_state *state, u32 num_rings, - int tx_rx_gap) -{ - u32 val, tmp_val, i; - int ret; - - for (i = 0; i < num_rings; i++) - ops->write_csr_ring_base(base, bank, i, state->rings[i].base); - - for (i = 0; i < num_rings; i++) - ops->write_csr_ring_config(base, bank, i, state->rings[i].config); - - for (i = 0; i < num_rings / 2; i++) { - int tx = i * (tx_rx_gap + 1); - int rx = tx + tx_rx_gap; - - ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); - ops->write_csr_ring_tail(base, bank, tx, state->rings[tx].tail); - - /* - * The TX ring head needs to be updated again to make sure that - * the HW will not consider the ring as full when it is empty - * and the correct state flags are set to match the recovered state. - */ - if (state->ringestat & BIT(tx)) { - val = ops->read_csr_int_srcsel(base, bank); - val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK; - ops->write_csr_int_srcsel_w_val(base, bank, val); - ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); - } - - ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); - val = ops->read_csr_int_srcsel(base, bank); - val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; - ops->write_csr_int_srcsel_w_val(base, bank, val); - - ops->write_csr_ring_head(base, bank, rx, state->rings[rx].head); - val = ops->read_csr_int_srcsel(base, bank); - val |= ADF_RP_INT_SRC_SEL_F_FALL_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; - ops->write_csr_int_srcsel_w_val(base, bank, val); - - /* - * The RX ring tail needs to be updated again to make sure that - * the HW will not consider the ring as empty when it is full - * and the correct state flags are set to match the recovered state. - */ - if (state->ringfstat & BIT(rx)) - ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); - } - - ops->write_csr_int_flag_and_col(base, bank, state->iaintflagandcolen); - ops->write_csr_int_en(base, bank, state->iaintflagen); - ops->write_csr_int_col_en(base, bank, state->iaintcolen); - ops->write_csr_int_srcsel_w_val(base, bank, state->iaintflagsrcsel0); - ops->write_csr_exp_int_en(base, bank, state->ringexpintenable); - ops->write_csr_int_col_ctl(base, bank, state->iaintcolctl); - - /* - * Verify whether any exceptions were raised during the bank save process. - * If exceptions occurred, the status and exception registers cannot - * be directly restored. Consequently, further restoration is not - * feasible, and the current state of the ring should be maintained. - */ - val = state->ringexpstat; - if (val) { - pr_info("Bank %u state not fully restored due to exception in saved state (%#x)\n", - bank, val); - return 0; - } - - /* Ensure that the restoration process completed without exceptions */ - tmp_val = ops->read_csr_exp_stat(base, bank); - if (tmp_val) { - pr_err("Bank %u restored with exception: %#x\n", bank, tmp_val); - return -EFAULT; - } - - ops->write_csr_ring_srv_arb_en(base, bank, state->ringsrvarben); - - /* Check that all ring statuses match the saved state. */ - ret = check_stat(ops->read_csr_stat, state->ringstat0, "ringstat", - base, bank); - if (ret) - return ret; - - ret = check_stat(ops->read_csr_e_stat, state->ringestat, "ringestat", - base, bank); - if (ret) - return ret; - - ret = check_stat(ops->read_csr_ne_stat, state->ringnestat, "ringnestat", - base, bank); - if (ret) - return ret; - - ret = check_stat(ops->read_csr_nf_stat, state->ringnfstat, "ringnfstat", - base, bank); - if (ret) - return ret; - - ret = check_stat(ops->read_csr_f_stat, state->ringfstat, "ringfstat", - base, bank); - if (ret) - return ret; - - ret = check_stat(ops->read_csr_c_stat, state->ringcstat0, "ringcstat", - base, bank); - if (ret) - return ret; - - return 0; -} - -int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, - struct bank_state *state) -{ - struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); - struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); - void __iomem *csr_base = adf_get_etr_base(accel_dev); - - if (bank_number >= hw_data->num_banks || !state) - return -EINVAL; - - dev_dbg(&GET_DEV(accel_dev), "Saving state of bank %d\n", bank_number); - - bank_state_save(csr_ops, csr_base, bank_number, state, - hw_data->num_rings_per_bank); - - return 0; -} -EXPORT_SYMBOL_GPL(adf_gen4_bank_state_save); - -int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number, - struct bank_state *state) -{ - struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); - struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); - void __iomem *csr_base = adf_get_etr_base(accel_dev); - int ret; - - if (bank_number >= hw_data->num_banks || !state) - return -EINVAL; - - dev_dbg(&GET_DEV(accel_dev), "Restoring state of bank %d\n", bank_number); - - ret = bank_state_restore(csr_ops, csr_base, bank_number, state, - hw_data->num_rings_per_bank, hw_data->tx_rx_gap); - if (ret) - dev_err(&GET_DEV(accel_dev), - "Unable to restore state of bank %d\n", bank_number); - - return ret; -} -EXPORT_SYMBOL_GPL(adf_gen4_bank_state_restore); - static int adf_gen4_build_comp_block(void *ctx, enum adf_dc_algo algo) { struct icp_qat_fw_comp_req *req_tmpl = ctx; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index e4f4d5fa616d..7f2b9cb0fe60 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -84,9 +84,6 @@ #define ADF_WQM_CSR_RPRESETSTS(bank) (ADF_WQM_CSR_RPRESETCTL(bank) + 4) /* Ring interrupt */ -#define ADF_RP_INT_SRC_SEL_F_RISE_MASK GENMASK(1, 0) -#define ADF_RP_INT_SRC_SEL_F_FALL_MASK GENMASK(2, 0) -#define ADF_RP_INT_SRC_SEL_RANGE_WIDTH 4 #define ADF_COALESCED_POLL_TIMEOUT_US (1 * USEC_PER_SEC) #define ADF_COALESCED_POLL_DELAY_US 1000 #define ADF_WQM_CSR_RPINTSOU(bank) (0x200000 + ((bank) << 12)) @@ -176,10 +173,6 @@ int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev, u32 bank_number, int timeout_us); void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev, u32 bank_number); -int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, - struct bank_state *state); -int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, - u32 bank_number, struct bank_state *state); bool adf_gen4_services_supported(unsigned long service_mask); void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops); From a47dc5d1a6e2342e7725c6c592800d49b7276b7a Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Tue, 1 Jul 2025 10:47:29 +0100 Subject: [PATCH 082/116] crypto: qat - relocate and rename bank state structure definition The `bank_state` structure represents the state of a bank of rings. As part of recent refactoring, the functions that interact with this structure have been moved to a new unit, adf_bank_state.c. To align with this reorganization, rename `struct bank_state` to `struct adf_bank_state` and move its definition to adf_bank_state.h. Also relocate the associated `struct ring_config` to the same header to consolidate related definitions. Update all references to use the new structure name. This does not introduce any functional change. Signed-off-by: Suman Kumar Chakraborty Signed-off-by: Herbert Xu --- .../intel/qat/qat_common/adf_accel_devices.h | 38 ++---------------- .../intel/qat/qat_common/adf_bank_state.c | 8 ++-- .../intel/qat/qat_common/adf_bank_state.h | 39 +++++++++++++++++-- .../intel/qat/qat_common/adf_gen4_vf_mig.c | 7 ++-- 4 files changed, 47 insertions(+), 45 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index 2ee526063213..f76e0f6c66ae 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -157,39 +157,7 @@ struct admin_info { u32 mailbox_offset; }; -struct ring_config { - u64 base; - u32 config; - u32 head; - u32 tail; - u32 reserved0; -}; - -struct bank_state { - u32 ringstat0; - u32 ringstat1; - u32 ringuostat; - u32 ringestat; - u32 ringnestat; - u32 ringnfstat; - u32 ringfstat; - u32 ringcstat0; - u32 ringcstat1; - u32 ringcstat2; - u32 ringcstat3; - u32 iaintflagen; - u32 iaintflagreg; - u32 iaintflagsrcsel0; - u32 iaintflagsrcsel1; - u32 iaintcolen; - u32 iaintcolctl; - u32 iaintflagandcolen; - u32 ringexpstat; - u32 ringexpintenable; - u32 ringsrvarben; - u32 reserved0; - struct ring_config rings[ADF_ETR_MAX_RINGS_PER_BANK]; -}; +struct adf_bank_state; struct adf_hw_csr_ops { u64 (*build_csr_ring_base_addr)(dma_addr_t addr, u32 size); @@ -338,9 +306,9 @@ struct adf_hw_device_data { void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev); int (*ring_pair_reset)(struct adf_accel_dev *accel_dev, u32 bank_nr); int (*bank_state_save)(struct adf_accel_dev *accel_dev, u32 bank_number, - struct bank_state *state); + struct adf_bank_state *state); int (*bank_state_restore)(struct adf_accel_dev *accel_dev, - u32 bank_number, struct bank_state *state); + u32 bank_number, struct adf_bank_state *state); void (*reset_device)(struct adf_accel_dev *accel_dev); void (*set_msix_rttable)(struct adf_accel_dev *accel_dev); const char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num); diff --git a/drivers/crypto/intel/qat/qat_common/adf_bank_state.c b/drivers/crypto/intel/qat/qat_common/adf_bank_state.c index 2a0bbee8a288..225d55d56a4b 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_bank_state.c +++ b/drivers/crypto/intel/qat/qat_common/adf_bank_state.c @@ -30,7 +30,7 @@ static inline int check_stat(u32 (*op)(void __iomem *, u32), u32 expect_val, } static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base, - u32 bank, struct bank_state *state, u32 num_rings) + u32 bank, struct adf_bank_state *state, u32 num_rings) { u32 i; @@ -60,7 +60,7 @@ static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base, } static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, - u32 bank, struct bank_state *state, u32 num_rings, + u32 bank, struct adf_bank_state *state, u32 num_rings, int tx_rx_gap) { u32 val, tmp_val, i; @@ -185,7 +185,7 @@ static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, * Returns 0 on success, error code otherwise */ int adf_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, - struct bank_state *state) + struct adf_bank_state *state) { struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); @@ -215,7 +215,7 @@ EXPORT_SYMBOL_GPL(adf_bank_state_save); * Returns 0 on success, error code otherwise */ int adf_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number, - struct bank_state *state) + struct adf_bank_state *state) { struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); diff --git a/drivers/crypto/intel/qat/qat_common/adf_bank_state.h b/drivers/crypto/intel/qat/qat_common/adf_bank_state.h index 85b15ed161f4..48b573d692dd 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_bank_state.h +++ b/drivers/crypto/intel/qat/qat_common/adf_bank_state.h @@ -6,11 +6,44 @@ #include struct adf_accel_dev; -struct bank_state; + +struct ring_config { + u64 base; + u32 config; + u32 head; + u32 tail; + u32 reserved0; +}; + +struct adf_bank_state { + u32 ringstat0; + u32 ringstat1; + u32 ringuostat; + u32 ringestat; + u32 ringnestat; + u32 ringnfstat; + u32 ringfstat; + u32 ringcstat0; + u32 ringcstat1; + u32 ringcstat2; + u32 ringcstat3; + u32 iaintflagen; + u32 iaintflagreg; + u32 iaintflagsrcsel0; + u32 iaintflagsrcsel1; + u32 iaintcolen; + u32 iaintcolctl; + u32 iaintflagandcolen; + u32 ringexpstat; + u32 ringexpintenable; + u32 ringsrvarben; + u32 reserved0; + struct ring_config rings[ADF_ETR_MAX_RINGS_PER_BANK]; +}; int adf_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number, - struct bank_state *state); + struct adf_bank_state *state); int adf_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, - struct bank_state *state); + struct adf_bank_state *state); #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c index a62eb5e8dbe6..adb21656a3ba 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c @@ -9,6 +9,7 @@ #include #include "adf_accel_devices.h" +#include "adf_bank_state.h" #include "adf_common_drv.h" #include "adf_gen4_hw_data.h" #include "adf_gen4_pfvf.h" @@ -358,7 +359,7 @@ static int adf_gen4_vfmig_load_etr_regs(struct adf_mstate_mgr *sub_mgr, pf_bank_nr = vf_bank_info->bank_nr + vf_bank_info->vf_nr * hw_data->num_banks_per_vf; ret = hw_data->bank_state_restore(accel_dev, pf_bank_nr, - (struct bank_state *)state); + (struct adf_bank_state *)state); if (ret) { dev_err(&GET_DEV(accel_dev), "Failed to load regs for vf%d bank%d\n", @@ -585,7 +586,7 @@ static int adf_gen4_vfmig_save_etr_regs(struct adf_mstate_mgr *subs, u8 *state, pf_bank_nr += vf_bank_info->vf_nr * hw_data->num_banks_per_vf; ret = hw_data->bank_state_save(accel_dev, pf_bank_nr, - (struct bank_state *)state); + (struct adf_bank_state *)state); if (ret) { dev_err(&GET_DEV(accel_dev), "Failed to save regs for vf%d bank%d\n", @@ -593,7 +594,7 @@ static int adf_gen4_vfmig_save_etr_regs(struct adf_mstate_mgr *subs, u8 *state, return ret; } - return sizeof(struct bank_state); + return sizeof(struct adf_bank_state); } static int adf_gen4_vfmig_save_etr_bank(struct adf_accel_dev *accel_dev, From 26abce2510980f8c4693a6a35753a94f0efd47ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C5=82gorzata=20Mielnik?= Date: Tue, 1 Jul 2025 10:47:30 +0100 Subject: [PATCH 083/116] crypto: qat - add live migration enablers for GEN6 devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation of the QAT live migration enablers is exclusive to QAT GEN4 devices and resides within QAT GEN4 specific files. However, the underlying mechanisms, such as the relevant CSRs and offsets, can be shared between QAT GEN4 and QAT GEN6 devices. Add the necessary enablers required to implement live migration for QAT GEN6 devices to the abstraction layer to allow leveraging the existing QAT GEN4 implementation. Signed-off-by: MaÅ‚gorzata Mielnik Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c | 4 ++++ drivers/crypto/intel/qat/qat_common/adf_gen6_shared.c | 7 +++++++ drivers/crypto/intel/qat/qat_common/adf_gen6_shared.h | 2 ++ 3 files changed, 13 insertions(+) diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c index 435d2ff38ab3..4d93d5a56ba3 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -842,6 +843,8 @@ void adf_init_hw_data_6xxx(struct adf_hw_device_data *hw_data) hw_data->disable_iov = adf_disable_sriov; hw_data->ring_pair_reset = ring_pair_reset; hw_data->dev_config = dev_config; + hw_data->bank_state_save = adf_bank_state_save; + hw_data->bank_state_restore = adf_bank_state_restore; hw_data->get_hb_clock = get_heartbeat_clock; hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE; hw_data->start_timer = adf_timer_start; @@ -853,6 +856,7 @@ void adf_init_hw_data_6xxx(struct adf_hw_device_data *hw_data) adf_gen6_init_hw_csr_ops(&hw_data->csr_ops); adf_gen6_init_pf_pfvf_ops(&hw_data->pfvf_ops); adf_gen6_init_dc_ops(&hw_data->dc_ops); + adf_gen6_init_vf_mig_ops(&hw_data->vfmig_ops); adf_gen6_init_ras_ops(&hw_data->ras_ops); } diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.c b/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.c index 58a072e2f936..c9b151006dca 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.c @@ -5,6 +5,7 @@ #include "adf_gen4_config.h" #include "adf_gen4_hw_csr_data.h" #include "adf_gen4_pfvf.h" +#include "adf_gen4_vf_mig.h" #include "adf_gen6_shared.h" struct adf_accel_dev; @@ -47,3 +48,9 @@ int adf_gen6_no_dev_config(struct adf_accel_dev *accel_dev) return adf_no_dev_config(accel_dev); } EXPORT_SYMBOL_GPL(adf_gen6_no_dev_config); + +void adf_gen6_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops) +{ + adf_gen4_init_vf_mig_ops(vfmig_ops); +} +EXPORT_SYMBOL_GPL(adf_gen6_init_vf_mig_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.h b/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.h index bc8e71e984fc..fc6fad029a70 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_shared.h @@ -4,6 +4,7 @@ #define ADF_GEN6_SHARED_H_ struct adf_hw_csr_ops; +struct qat_migdev_ops; struct adf_accel_dev; struct adf_pfvf_ops; @@ -12,4 +13,5 @@ void adf_gen6_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); int adf_gen6_cfg_dev_init(struct adf_accel_dev *accel_dev); int adf_gen6_comp_dev_config(struct adf_accel_dev *accel_dev); int adf_gen6_no_dev_config(struct adf_accel_dev *accel_dev); +void adf_gen6_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops); #endif/* ADF_GEN6_SHARED_H_ */ From 75e2d4b1eddcfafd3b3961490f7da0bae4ecb451 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Tue, 1 Jul 2025 13:20:45 +0200 Subject: [PATCH 084/116] crypto: caam - avoid option aliasing with the CONFIG_CAAM_QI build option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the Makefile, the new build option CONFIG_CAAM_QI is defined conditioned on the existence of the CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI, which is properly defined in the Kconfig file. So, CONFIG_CAAM_QI is just a local alias for CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI. There is little benefit in the source code of having this slightly shorter alias for this configuration, but it complicates further maintenance, as searching for the impact of CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI requires to grep once, and then identify the option introduced and continue searching for that. Further, tools, such as cross referencers, and scripts to check Kconfig definitions and their use simply do not handle this situation. Given that this is the only incidence of such a config alias in the whole kernel tree, just prefer to avoid this pattern of aliasing here. Use CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI throughout the Freescale CAAM-Multicore platform driver backend source code. No functional change. Signed-off-by: Lukas Bulwahn Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/Makefile | 4 ---- drivers/crypto/caam/ctrl.c | 6 +++--- drivers/crypto/caam/debugfs.c | 2 +- drivers/crypto/caam/debugfs.h | 2 +- drivers/crypto/caam/intern.h | 4 ++-- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index acf1b197eb84..d2eaf5205b1c 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -25,10 +25,6 @@ caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caampkc.o pkc_desc.o caam_jr-$(CONFIG_CRYPTO_DEV_FSL_CAAM_BLOB_GEN) += blob_gen.o caam-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) += qi.o -ifneq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI),) - ccflags-y += -DCONFIG_CAAM_QI -endif - caam-$(CONFIG_DEBUG_FS) += debugfs.o obj-$(CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM) += dpaa2_caam.o diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index ce7b99019537..a93be395c878 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -24,7 +24,7 @@ bool caam_dpaa2; EXPORT_SYMBOL(caam_dpaa2); -#ifdef CONFIG_CAAM_QI +#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI #include "qi.h" #endif @@ -968,7 +968,7 @@ static int caam_probe(struct platform_device *pdev) caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2); ctrlpriv->qi_present = !!(comp_params & CTPR_MS_QI_MASK); -#ifdef CONFIG_CAAM_QI +#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI /* If (DPAA 1.x) QI present, check whether dependencies are available */ if (ctrlpriv->qi_present && !caam_dpaa2) { ret = qman_is_probed(); @@ -1099,7 +1099,7 @@ static int caam_probe(struct platform_device *pdev) wr_reg32(&ctrlpriv->qi->qi_control_lo, QICTL_DQEN); /* If QMAN driver is present, init CAAM-QI backend */ -#ifdef CONFIG_CAAM_QI +#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI ret = caam_qi_init(pdev); if (ret) dev_err(dev, "caam qi i/f init failed: %d\n", ret); diff --git a/drivers/crypto/caam/debugfs.c b/drivers/crypto/caam/debugfs.c index 6358d3cabf57..718352b7afb5 100644 --- a/drivers/crypto/caam/debugfs.c +++ b/drivers/crypto/caam/debugfs.c @@ -22,7 +22,7 @@ static int caam_debugfs_u32_get(void *data, u64 *val) DEFINE_DEBUGFS_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); -#ifdef CONFIG_CAAM_QI +#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI /* * This is a counter for the number of times the congestion group (where all * the request and response queueus are) reached congestion. Incremented diff --git a/drivers/crypto/caam/debugfs.h b/drivers/crypto/caam/debugfs.h index 8b5d1acd21a7..ef238c71f92a 100644 --- a/drivers/crypto/caam/debugfs.h +++ b/drivers/crypto/caam/debugfs.h @@ -18,7 +18,7 @@ static inline void caam_debugfs_init(struct caam_drv_private *ctrlpriv, {} #endif -#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_CAAM_QI) +#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI) void caam_debugfs_qi_congested(void); void caam_debugfs_qi_init(struct caam_drv_private *ctrlpriv); #else diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h index 51c90d17a40d..a88da0d31b23 100644 --- a/drivers/crypto/caam/intern.h +++ b/drivers/crypto/caam/intern.h @@ -227,7 +227,7 @@ static inline int caam_prng_register(struct device *dev) static inline void caam_prng_unregister(void *data) {} #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_PRNG_API */ -#ifdef CONFIG_CAAM_QI +#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI int caam_qi_algapi_init(struct device *dev); void caam_qi_algapi_exit(void); @@ -243,7 +243,7 @@ static inline void caam_qi_algapi_exit(void) { } -#endif /* CONFIG_CAAM_QI */ +#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API_QI */ static inline u64 caam_get_dma_mask(struct device *dev) { From 99d9edf6380be9912bcee53e5262704ebdb96869 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:53:59 +0300 Subject: [PATCH 085/116] hwrng: drivers - Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Herbert Xu --- drivers/char/hw_random/atmel-rng.c | 1 - drivers/char/hw_random/cctrng.c | 1 - drivers/char/hw_random/mtk-rng.c | 1 - drivers/char/hw_random/npcm-rng.c | 1 - drivers/char/hw_random/omap3-rom-rng.c | 1 - drivers/char/hw_random/rockchip-rng.c | 3 --- drivers/char/hw_random/stm32-rng.c | 1 - 7 files changed, 9 deletions(-) diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index d2b00458761e..6ed24be3481d 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -80,7 +80,6 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max, ret = 4; out: - pm_runtime_mark_last_busy(trng->dev); pm_runtime_put_sync_autosuspend(trng->dev); return ret; } diff --git a/drivers/char/hw_random/cctrng.c b/drivers/char/hw_random/cctrng.c index 4db198849695..a5be9258037f 100644 --- a/drivers/char/hw_random/cctrng.c +++ b/drivers/char/hw_random/cctrng.c @@ -98,7 +98,6 @@ static void cc_trng_pm_put_suspend(struct device *dev) { int rc = 0; - pm_runtime_mark_last_busy(dev); rc = pm_runtime_put_autosuspend(dev); if (rc) dev_err(dev, "pm_runtime_put_autosuspend returned %x\n", rc); diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c index d09a4d813766..5808d09d12c4 100644 --- a/drivers/char/hw_random/mtk-rng.c +++ b/drivers/char/hw_random/mtk-rng.c @@ -98,7 +98,6 @@ static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) max -= sizeof(u32); } - pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_sync_autosuspend(priv->dev); return retval || !wait ? retval : -EIO; diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c index 3e308c890bd2..40d6e29dea03 100644 --- a/drivers/char/hw_random/npcm-rng.c +++ b/drivers/char/hw_random/npcm-rng.c @@ -80,7 +80,6 @@ static int npcm_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) max--; } - pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_sync_autosuspend(priv->dev); return retval || !wait ? retval : -EIO; diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index 8064c792caf0..aa71f61c3dc9 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -56,7 +56,6 @@ static int omap3_rom_rng_read(struct hwrng *rng, void *data, size_t max, bool w) else r = 4; - pm_runtime_mark_last_busy(ddata->dev); pm_runtime_put_autosuspend(ddata->dev); return r; diff --git a/drivers/char/hw_random/rockchip-rng.c b/drivers/char/hw_random/rockchip-rng.c index fb4a30b95507..6e3ed4b85605 100644 --- a/drivers/char/hw_random/rockchip-rng.c +++ b/drivers/char/hw_random/rockchip-rng.c @@ -223,7 +223,6 @@ static int rk3568_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) /* Read random data stored in the registers */ memcpy_fromio(buf, rk_rng->base + TRNG_RNG_DOUT, to_read); out: - pm_runtime_mark_last_busy(rk_rng->dev); pm_runtime_put_sync_autosuspend(rk_rng->dev); return (ret < 0) ? ret : to_read; @@ -263,7 +262,6 @@ static int rk3576_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) memcpy_fromio(buf, rk_rng->base + RKRNG_TRNG_DATA0, to_read); out: - pm_runtime_mark_last_busy(rk_rng->dev); pm_runtime_put_sync_autosuspend(rk_rng->dev); return (ret < 0) ? ret : to_read; @@ -355,7 +353,6 @@ static int rk3588_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) /* close the TRNG */ rk_rng_writel(rk_rng, TRNG_V1_CTRL_NOP, TRNG_V1_CTRL); - pm_runtime_mark_last_busy(rk_rng->dev); pm_runtime_put_sync_autosuspend(rk_rng->dev); return (ret < 0) ? ret : to_read; diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index 98edbe796bc5..9a8c00586ab0 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -255,7 +255,6 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) } exit_rpm: - pm_runtime_mark_last_busy(priv->dev); pm_runtime_put_sync_autosuspend(priv->dev); return retval || !wait ? retval : -EIO; From 62842d290ee71feb89b2ed4fe810c55f06031ae4 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Fri, 4 Jul 2025 10:54:02 +0300 Subject: [PATCH 086/116] crypto: drivers - Remove redundant pm_runtime_mark_last_busy() calls pm_runtime_put_autosuspend(), pm_runtime_put_sync_autosuspend(), pm_runtime_autosuspend() and pm_request_autosuspend() now include a call to pm_runtime_mark_last_busy(). Remove the now-reduntant explicit call to pm_runtime_mark_last_busy(). Signed-off-by: Sakari Ailus Signed-off-by: Herbert Xu --- drivers/crypto/ccree/cc_pm.c | 1 - drivers/crypto/hisilicon/qm.c | 1 - drivers/crypto/omap-aes-gcm.c | 1 - drivers/crypto/omap-aes.c | 1 - drivers/crypto/omap-des.c | 1 - drivers/crypto/omap-sham.c | 1 - drivers/crypto/stm32/stm32-cryp.c | 1 - drivers/crypto/stm32/stm32-hash.c | 1 - 8 files changed, 8 deletions(-) diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c index 6124fbbbed94..bbd118f8de0e 100644 --- a/drivers/crypto/ccree/cc_pm.c +++ b/drivers/crypto/ccree/cc_pm.c @@ -77,6 +77,5 @@ int cc_pm_get(struct device *dev) void cc_pm_put_suspend(struct device *dev) { - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); } diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 7c41f9593d03..2e4ee7ecfdfb 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -912,7 +912,6 @@ static void qm_pm_put_sync(struct hisi_qm *qm) if (!test_bit(QM_SUPPORT_RPM, &qm->caps)) return; - pm_runtime_mark_last_busy(dev); pm_runtime_put_autosuspend(dev); } diff --git a/drivers/crypto/omap-aes-gcm.c b/drivers/crypto/omap-aes-gcm.c index c498950402e8..1f4586509ca4 100644 --- a/drivers/crypto/omap-aes-gcm.c +++ b/drivers/crypto/omap-aes-gcm.c @@ -38,7 +38,6 @@ static void omap_aes_gcm_finish_req(struct omap_aes_dev *dd, int ret) crypto_finalize_aead_request(dd->engine, req, ret); - pm_runtime_mark_last_busy(dd->dev); pm_runtime_put_autosuspend(dd->dev); } diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 1ecf5f6ac04e..244e24e52987 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -400,7 +400,6 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) crypto_finalize_skcipher_request(dd->engine, req, err); - pm_runtime_mark_last_busy(dd->dev); pm_runtime_put_autosuspend(dd->dev); } diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index a099460d5f21..9c5538ae17db 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -489,7 +489,6 @@ static void omap_des_finish_req(struct omap_des_dev *dd, int err) crypto_finalize_skcipher_request(dd->engine, req, err); - pm_runtime_mark_last_busy(dd->dev); pm_runtime_put_autosuspend(dd->dev); } diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 56f192cb976d..6328e8026b91 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1167,7 +1167,6 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) dd->flags &= ~(BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); - pm_runtime_mark_last_busy(dd->dev); pm_runtime_put_autosuspend(dd->dev); ctx->offset = 0; diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c index 5ce88e7a8f65..a89b4c5d62a0 100644 --- a/drivers/crypto/stm32/stm32-cryp.c +++ b/drivers/crypto/stm32/stm32-cryp.c @@ -851,7 +851,6 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err) if (!err && (!(is_gcm(cryp) || is_ccm(cryp) || is_ecb(cryp)))) stm32_cryp_get_iv(cryp); - pm_runtime_mark_last_busy(cryp->dev); pm_runtime_put_autosuspend(cryp->dev); if (is_gcm(cryp) || is_ccm(cryp)) diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index 768b27de4737..a4436728b0db 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -1373,7 +1373,6 @@ static void stm32_hash_unprepare_request(struct ahash_request *req) *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); pm_runtime: - pm_runtime_mark_last_busy(hdev->dev); pm_runtime_put_autosuspend(hdev->dev); } From e83cfb8ff1433cc832d31b8cac967a1eb79d5b44 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Mon, 7 Jul 2025 09:54:17 +0100 Subject: [PATCH 087/116] crypto: qat - fix virtual channel configuration for GEN6 devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The TCVCMAP (Traffic Class to Virtual Channel Mapping) field in the PVC0CTL and PVC1CTL register controls how traffic classes are mapped to virtual channels in QAT GEN6 hardware. The driver previously wrote a default TCVCMAP value to this register, but this configuration was incorrect. Modify the TCVCMAP configuration to explicitly enable both VC0 and VC1, and map Traffic Classes 0 to 7 → VC0 and Traffic Class 8 → VC1. Replace FIELD_PREP() with FIELD_MODIFY() to ensure that only the intended TCVCMAP field is updated, preserving other bits in the register. This prevents unintended overwrites of unrelated configuration fields when modifying TC to VC mappings. Fixes: 17fd7514ae68 ("crypto: qat - add qat_6xxx driver") Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c | 10 +++++----- drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c index 4d93d5a56ba3..a21a10a8338f 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c @@ -532,8 +532,8 @@ static void set_vc_csr_for_bank(void __iomem *csr, u32 bank_number) * driver must program the ringmodectl CSRs. */ value = ADF_CSR_RD(csr, ADF_GEN6_CSR_RINGMODECTL(bank_number)); - value |= FIELD_PREP(ADF_GEN6_RINGMODECTL_TC_MASK, ADF_GEN6_RINGMODECTL_TC_DEFAULT); - value |= FIELD_PREP(ADF_GEN6_RINGMODECTL_TC_EN_MASK, ADF_GEN6_RINGMODECTL_TC_EN_OP1); + FIELD_MODIFY(ADF_GEN6_RINGMODECTL_TC_MASK, &value, ADF_GEN6_RINGMODECTL_TC_DEFAULT); + FIELD_MODIFY(ADF_GEN6_RINGMODECTL_TC_EN_MASK, &value, ADF_GEN6_RINGMODECTL_TC_EN_OP1); ADF_CSR_WR(csr, ADF_GEN6_CSR_RINGMODECTL(bank_number), value); } @@ -549,7 +549,7 @@ static int set_vc_config(struct adf_accel_dev *accel_dev) * Read PVC0CTL then write the masked values. */ pci_read_config_dword(pdev, ADF_GEN6_PVC0CTL_OFFSET, &value); - value |= FIELD_PREP(ADF_GEN6_PVC0CTL_TCVCMAP_MASK, ADF_GEN6_PVC0CTL_TCVCMAP_DEFAULT); + FIELD_MODIFY(ADF_GEN6_PVC0CTL_TCVCMAP_MASK, &value, ADF_GEN6_PVC0CTL_TCVCMAP_DEFAULT); err = pci_write_config_dword(pdev, ADF_GEN6_PVC0CTL_OFFSET, value); if (err) { dev_err(&GET_DEV(accel_dev), "pci write to PVC0CTL failed\n"); @@ -558,8 +558,8 @@ static int set_vc_config(struct adf_accel_dev *accel_dev) /* Read PVC1CTL then write masked values */ pci_read_config_dword(pdev, ADF_GEN6_PVC1CTL_OFFSET, &value); - value |= FIELD_PREP(ADF_GEN6_PVC1CTL_TCVCMAP_MASK, ADF_GEN6_PVC1CTL_TCVCMAP_DEFAULT); - value |= FIELD_PREP(ADF_GEN6_PVC1CTL_VCEN_MASK, ADF_GEN6_PVC1CTL_VCEN_ON); + FIELD_MODIFY(ADF_GEN6_PVC1CTL_TCVCMAP_MASK, &value, ADF_GEN6_PVC1CTL_TCVCMAP_DEFAULT); + FIELD_MODIFY(ADF_GEN6_PVC1CTL_VCEN_MASK, &value, ADF_GEN6_PVC1CTL_VCEN_ON); err = pci_write_config_dword(pdev, ADF_GEN6_PVC1CTL_OFFSET, value); if (err) dev_err(&GET_DEV(accel_dev), "pci write to PVC1CTL failed\n"); diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h index 78e2e2c5816e..8824958527c4 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h @@ -99,7 +99,7 @@ #define ADF_GEN6_PVC0CTL_OFFSET 0x204 #define ADF_GEN6_PVC0CTL_TCVCMAP_OFFSET 1 #define ADF_GEN6_PVC0CTL_TCVCMAP_MASK GENMASK(7, 1) -#define ADF_GEN6_PVC0CTL_TCVCMAP_DEFAULT 0x7F +#define ADF_GEN6_PVC0CTL_TCVCMAP_DEFAULT 0x3F /* VC1 Resource Control Register */ #define ADF_GEN6_PVC1CTL_OFFSET 0x210 From 7c68005a46108ffaa598e91f1571e5f7f9acb6dc Mon Sep 17 00:00:00 2001 From: George Abraham P Date: Mon, 7 Jul 2025 13:28:45 +0100 Subject: [PATCH 088/116] crypto: qat - relocate power management debugfs helper APIs Relocate the power management debugfs helper APIs in a common file adf_pm_dbgfs_utils.h and adf_pm_dbgfs_utils.c so that it can be shared between device generations. When moving logic from adf_gen4_pm_debugfs.c to adf_pm_dbgfs_utils.c, the include kernel.h has been replaced with the required include. This does not introduce any functional change. Signed-off-by: George Abraham P Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/Makefile | 1 + .../qat/qat_common/adf_gen4_pm_debugfs.c | 105 ++++-------------- .../intel/qat/qat_common/adf_pm_dbgfs_utils.c | 52 +++++++++ .../intel/qat/qat_common/adf_pm_dbgfs_utils.h | 36 ++++++ 4 files changed, 108 insertions(+), 86 deletions(-) create mode 100644 drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.c create mode 100644 drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.h diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index e426cc3c49c3..5826180c2051 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -52,6 +52,7 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_cnv_dbgfs.o \ adf_heartbeat_dbgfs.o \ adf_heartbeat.o \ adf_pm_dbgfs.o \ + adf_pm_dbgfs_utils.o \ adf_telemetry.o \ adf_tl_debugfs.o \ adf_transport_debug.o diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_pm_debugfs.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_pm_debugfs.c index 2e4095c4c12c..b7e38842a46d 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_pm_debugfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_pm_debugfs.c @@ -1,47 +1,18 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2023 Intel Corporation */ #include -#include #include -#include #include "adf_accel_devices.h" #include "adf_admin.h" #include "adf_common_drv.h" #include "adf_gen4_pm.h" +#include "adf_pm_dbgfs_utils.h" #include "icp_qat_fw_init_admin.h" -/* - * This is needed because a variable is used to index the mask at - * pm_scnprint_table(), making it not compile time constant, so the compile - * asserts from FIELD_GET() or u32_get_bits() won't be fulfilled. - */ -#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1)) - -#define PM_INFO_MEMBER_OFF(member) \ - (offsetof(struct icp_qat_fw_init_admin_pm_info, member) / sizeof(u32)) - -#define PM_INFO_REGSET_ENTRY_MASK(_reg_, _field_, _mask_) \ -{ \ - .reg_offset = PM_INFO_MEMBER_OFF(_reg_), \ - .key = __stringify(_field_), \ - .field_mask = _mask_, \ -} - -#define PM_INFO_REGSET_ENTRY32(_reg_, _field_) \ - PM_INFO_REGSET_ENTRY_MASK(_reg_, _field_, GENMASK(31, 0)) - #define PM_INFO_REGSET_ENTRY(_reg_, _field_) \ PM_INFO_REGSET_ENTRY_MASK(_reg_, _field_, ADF_GEN4_PM_##_field_##_MASK) -#define PM_INFO_MAX_KEY_LEN 21 - -struct pm_status_row { - int reg_offset; - u32 field_mask; - const char *key; -}; - static const struct pm_status_row pm_fuse_rows[] = { PM_INFO_REGSET_ENTRY(fusectl0, ENABLE_PM), PM_INFO_REGSET_ENTRY(fusectl0, ENABLE_PM_IDLE), @@ -109,44 +80,6 @@ static const struct pm_status_row pm_csrs_rows[] = { PM_INFO_REGSET_ENTRY32(pm.pwrreq, CPM_PM_PWRREQ), }; -static int pm_scnprint_table(char *buff, const struct pm_status_row *table, - u32 *pm_info_regs, size_t buff_size, int table_len, - bool lowercase) -{ - char key[PM_INFO_MAX_KEY_LEN]; - int wr = 0; - int i; - - for (i = 0; i < table_len; i++) { - if (lowercase) - string_lower(key, table[i].key); - else - string_upper(key, table[i].key); - - wr += scnprintf(&buff[wr], buff_size - wr, "%s: %#x\n", key, - field_get(table[i].field_mask, - pm_info_regs[table[i].reg_offset])); - } - - return wr; -} - -static int pm_scnprint_table_upper_keys(char *buff, const struct pm_status_row *table, - u32 *pm_info_regs, size_t buff_size, - int table_len) -{ - return pm_scnprint_table(buff, table, pm_info_regs, buff_size, - table_len, false); -} - -static int pm_scnprint_table_lower_keys(char *buff, const struct pm_status_row *table, - u32 *pm_info_regs, size_t buff_size, - int table_len) -{ - return pm_scnprint_table(buff, table, pm_info_regs, buff_size, - table_len, true); -} - static_assert(sizeof(struct icp_qat_fw_init_admin_pm_info) < PAGE_SIZE); static ssize_t adf_gen4_print_pm_status(struct adf_accel_dev *accel_dev, @@ -191,9 +124,9 @@ static ssize_t adf_gen4_print_pm_status(struct adf_accel_dev *accel_dev, /* Fusectl related */ len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "----------- PM Fuse info ---------\n"); - len += pm_scnprint_table_lower_keys(&pm_kv[len], pm_fuse_rows, - pm_info_regs, PAGE_SIZE - len, - ARRAY_SIZE(pm_fuse_rows)); + len += adf_pm_scnprint_table_lower_keys(&pm_kv[len], pm_fuse_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_fuse_rows)); len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "max_pwrreq: %#x\n", pm_info->max_pwrreq); len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "min_pwrreq: %#x\n", @@ -204,28 +137,28 @@ static ssize_t adf_gen4_print_pm_status(struct adf_accel_dev *accel_dev, "------------ PM Info ------------\n"); len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "power_level: %s\n", pm_info->pwr_state == PM_SET_MIN ? "min" : "max"); - len += pm_scnprint_table_lower_keys(&pm_kv[len], pm_info_rows, - pm_info_regs, PAGE_SIZE - len, - ARRAY_SIZE(pm_info_rows)); + len += adf_pm_scnprint_table_lower_keys(&pm_kv[len], pm_info_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_info_rows)); len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "pm_mode: STATIC\n"); /* SSM related */ len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "----------- SSM_PM Info ----------\n"); - len += pm_scnprint_table_lower_keys(&pm_kv[len], pm_ssm_rows, - pm_info_regs, PAGE_SIZE - len, - ARRAY_SIZE(pm_ssm_rows)); + len += adf_pm_scnprint_table_lower_keys(&pm_kv[len], pm_ssm_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_ssm_rows)); /* Log related */ len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "------------- PM Log -------------\n"); - len += pm_scnprint_table_lower_keys(&pm_kv[len], pm_log_rows, - pm_info_regs, PAGE_SIZE - len, - ARRAY_SIZE(pm_log_rows)); + len += adf_pm_scnprint_table_lower_keys(&pm_kv[len], pm_log_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_log_rows)); - len += pm_scnprint_table_lower_keys(&pm_kv[len], pm_event_rows, - pm_info_regs, PAGE_SIZE - len, - ARRAY_SIZE(pm_event_rows)); + len += adf_pm_scnprint_table_lower_keys(&pm_kv[len], pm_event_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_event_rows)); len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "idle_irq_count: %#x\n", pm->idle_irq_counters); @@ -241,9 +174,9 @@ static ssize_t adf_gen4_print_pm_status(struct adf_accel_dev *accel_dev, /* CSRs content */ len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "----------- HW PM CSRs -----------\n"); - len += pm_scnprint_table_upper_keys(&pm_kv[len], pm_csrs_rows, - pm_info_regs, PAGE_SIZE - len, - ARRAY_SIZE(pm_csrs_rows)); + len += adf_pm_scnprint_table_upper_keys(&pm_kv[len], pm_csrs_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_csrs_rows)); val = ADF_CSR_RD(pmisc, ADF_GEN4_PM_HOST_MSG); len += scnprintf(&pm_kv[len], PAGE_SIZE - len, diff --git a/drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.c b/drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.c new file mode 100644 index 000000000000..69295a9ddf0a --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 Intel Corporation */ +#include +#include +#include + +#include "adf_pm_dbgfs_utils.h" + +/* + * This is needed because a variable is used to index the mask at + * pm_scnprint_table(), making it not compile time constant, so the compile + * asserts from FIELD_GET() or u32_get_bits() won't be fulfilled. + */ +#define field_get(_mask, _reg) (((_reg) & (_mask)) >> (ffs(_mask) - 1)) + +#define PM_INFO_MAX_KEY_LEN 21 + +static int pm_scnprint_table(char *buff, const struct pm_status_row *table, + u32 *pm_info_regs, size_t buff_size, int table_len, + bool lowercase) +{ + char key[PM_INFO_MAX_KEY_LEN]; + int wr = 0; + int i; + + for (i = 0; i < table_len; i++) { + if (lowercase) + string_lower(key, table[i].key); + else + string_upper(key, table[i].key); + + wr += scnprintf(&buff[wr], buff_size - wr, "%s: %#x\n", key, + field_get(table[i].field_mask, + pm_info_regs[table[i].reg_offset])); + } + + return wr; +} + +int adf_pm_scnprint_table_upper_keys(char *buff, const struct pm_status_row *table, + u32 *pm_info_regs, size_t buff_size, int table_len) +{ + return pm_scnprint_table(buff, table, pm_info_regs, buff_size, + table_len, false); +} + +int adf_pm_scnprint_table_lower_keys(char *buff, const struct pm_status_row *table, + u32 *pm_info_regs, size_t buff_size, int table_len) +{ + return pm_scnprint_table(buff, table, pm_info_regs, buff_size, + table_len, true); +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.h b/drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.h new file mode 100644 index 000000000000..854f058b35ed --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_pm_dbgfs_utils.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2025 Intel Corporation */ +#ifndef ADF_PM_DBGFS_UTILS_H_ +#define ADF_PM_DBGFS_UTILS_H_ + +#include +#include +#include +#include "icp_qat_fw_init_admin.h" + +#define PM_INFO_MEMBER_OFF(member) \ + (offsetof(struct icp_qat_fw_init_admin_pm_info, member) / sizeof(u32)) + +#define PM_INFO_REGSET_ENTRY_MASK(_reg_, _field_, _mask_) \ +{ \ + .reg_offset = PM_INFO_MEMBER_OFF(_reg_), \ + .key = __stringify(_field_), \ + .field_mask = _mask_, \ +} + +#define PM_INFO_REGSET_ENTRY32(_reg_, _field_) \ + PM_INFO_REGSET_ENTRY_MASK(_reg_, _field_, GENMASK(31, 0)) + +struct pm_status_row { + int reg_offset; + u32 field_mask; + const char *key; +}; + +int adf_pm_scnprint_table_upper_keys(char *buff, const struct pm_status_row *table, + u32 *pm_info_regs, size_t buff_size, int table_len); + +int adf_pm_scnprint_table_lower_keys(char *buff, const struct pm_status_row *table, + u32 *pm_info_regs, size_t buff_size, int table_len); + +#endif /* ADF_PM_DBGFS_UTILS_H_ */ From c963ff0ec45a4eef7fad8a741848af9a888a0863 Mon Sep 17 00:00:00 2001 From: George Abraham P Date: Mon, 7 Jul 2025 13:28:46 +0100 Subject: [PATCH 089/116] crypto: qat - enable power management debugfs for GEN6 devices The QAT driver includes infrastructure to report power management (PM) information via debugfs. Extend this support to QAT GEN6 devices by exposing PM debug data through the `pm_status` file. This implementation reports the current PM state, power management hardware control and status registers (CSR), and per-domain power status specific to the QAT GEN6 architecture. The debug functionality is implemented in adf_gen6_pm_dbgfs.c and initialized as part of the enable_pm() function. Co-developed-by: Vijay Sundar Selvamani Signed-off-by: Vijay Sundar Selvamani Signed-off-by: George Abraham P Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- Documentation/ABI/testing/debugfs-driver-qat | 2 +- .../intel/qat/qat_6xxx/adf_6xxx_hw_data.c | 11 +- drivers/crypto/intel/qat/qat_common/Makefile | 1 + .../crypto/intel/qat/qat_common/adf_gen6_pm.h | 24 ++++ .../intel/qat/qat_common/adf_gen6_pm_dbgfs.c | 124 ++++++++++++++++++ 5 files changed, 160 insertions(+), 2 deletions(-) create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen6_pm_dbgfs.c diff --git a/Documentation/ABI/testing/debugfs-driver-qat b/Documentation/ABI/testing/debugfs-driver-qat index bd6793760f29..3f1efbbad6ca 100644 --- a/Documentation/ABI/testing/debugfs-driver-qat +++ b/Documentation/ABI/testing/debugfs-driver-qat @@ -67,7 +67,7 @@ Contact: qat-linux@intel.com Description: (RO) Read returns power management information specific to the QAT device. - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. What: /sys/kernel/debug/qat__/cnv_errors Date: January 2024 diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c index a21a10a8338f..ecef3dc28a91 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c @@ -763,7 +763,16 @@ static int adf_init_device(struct adf_accel_dev *accel_dev) static int enable_pm(struct adf_accel_dev *accel_dev) { - return adf_init_admin_pm(accel_dev, ADF_GEN6_PM_DEFAULT_IDLE_FILTER); + int ret; + + ret = adf_init_admin_pm(accel_dev, ADF_GEN6_PM_DEFAULT_IDLE_FILTER); + if (ret) + return ret; + + /* Initialize PM internal data */ + adf_gen6_init_dev_pm_data(accel_dev); + + return 0; } static int dev_config(struct adf_accel_dev *accel_dev) diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 5826180c2051..34019d8637a5 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -49,6 +49,7 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_cnv_dbgfs.o \ adf_fw_counters.o \ adf_gen4_pm_debugfs.o \ adf_gen4_tl.o \ + adf_gen6_pm_dbgfs.o \ adf_heartbeat_dbgfs.o \ adf_heartbeat.o \ adf_pm_dbgfs.o \ diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_pm.h b/drivers/crypto/intel/qat/qat_common/adf_gen6_pm.h index 9a5b995f7ada..4c0d576e8c21 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen6_pm.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_pm.h @@ -24,5 +24,29 @@ struct adf_accel_dev; /* cpm_pm_status bitfields */ #define ADF_GEN6_PM_INIT_STATE BIT(21) +#define ADF_GEN6_PM_CPM_PM_STATE_MASK GENMASK(22, 20) + +/* fusectl0 bitfields */ +#define ADF_GEN6_PM_ENABLE_PM_MASK BIT(21) +#define ADF_GEN6_PM_ENABLE_PM_IDLE_MASK BIT(22) +#define ADF_GEN6_PM_ENABLE_DEEP_PM_IDLE_MASK BIT(23) + +/* cpm_pm_fw_init bitfields */ +#define ADF_GEN6_PM_IDLE_FILTER_MASK GENMASK(5, 3) +#define ADF_GEN6_PM_IDLE_ENABLE_MASK BIT(2) + +/* ssm_pm_enable bitfield */ +#define ADF_GEN6_PM_SSM_PM_ENABLE_MASK BIT(0) + +/* ssm_pm_domain_status bitfield */ +#define ADF_GEN6_PM_DOMAIN_POWERED_UP_MASK BIT(0) + +#ifdef CONFIG_DEBUG_FS +void adf_gen6_init_dev_pm_data(struct adf_accel_dev *accel_dev); +#else +static inline void adf_gen6_init_dev_pm_data(struct adf_accel_dev *accel_dev) +{ +} +#endif /* CONFIG_DEBUG_FS */ #endif /* ADF_GEN6_PM_H */ diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_pm_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_gen6_pm_dbgfs.c new file mode 100644 index 000000000000..603aefba0fdb --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_pm_dbgfs.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 Intel Corporation */ +#include +#include +#include + +#include "adf_admin.h" +#include "adf_common_drv.h" +#include "adf_gen6_pm.h" +#include "adf_pm_dbgfs_utils.h" +#include "icp_qat_fw_init_admin.h" + +#define PM_INFO_REGSET_ENTRY(_reg_, _field_) \ + PM_INFO_REGSET_ENTRY_MASK(_reg_, _field_, ADF_GEN6_PM_##_field_##_MASK) + +static struct pm_status_row pm_fuse_rows[] = { + PM_INFO_REGSET_ENTRY(fusectl0, ENABLE_PM), + PM_INFO_REGSET_ENTRY(fusectl0, ENABLE_PM_IDLE), + PM_INFO_REGSET_ENTRY(fusectl0, ENABLE_DEEP_PM_IDLE), +}; + +static struct pm_status_row pm_info_rows[] = { + PM_INFO_REGSET_ENTRY(pm.status, CPM_PM_STATE), + PM_INFO_REGSET_ENTRY(pm.fw_init, IDLE_ENABLE), + PM_INFO_REGSET_ENTRY(pm.fw_init, IDLE_FILTER), +}; + +static struct pm_status_row pm_ssm_rows[] = { + PM_INFO_REGSET_ENTRY(ssm.pm_enable, SSM_PM_ENABLE), + PM_INFO_REGSET_ENTRY(ssm.pm_domain_status, DOMAIN_POWERED_UP), +}; + +static struct pm_status_row pm_csrs_rows[] = { + PM_INFO_REGSET_ENTRY32(pm.fw_init, CPM_PM_FW_INIT), + PM_INFO_REGSET_ENTRY32(pm.status, CPM_PM_STATUS), +}; + +static_assert(sizeof(struct icp_qat_fw_init_admin_pm_info) < PAGE_SIZE); + +static ssize_t adf_gen6_print_pm_status(struct adf_accel_dev *accel_dev, + char __user *buf, size_t count, + loff_t *pos) +{ + void __iomem *pmisc = adf_get_pmisc_base(accel_dev); + struct icp_qat_fw_init_admin_pm_info *pm_info; + dma_addr_t p_state_addr; + u32 *pm_info_regs; + size_t len = 0; + char *pm_kv; + u32 val; + int ret; + + pm_info = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!pm_info) + return -ENOMEM; + + pm_kv = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!pm_kv) { + kfree(pm_info); + return -ENOMEM; + } + + p_state_addr = dma_map_single(&GET_DEV(accel_dev), pm_info, PAGE_SIZE, + DMA_FROM_DEVICE); + ret = dma_mapping_error(&GET_DEV(accel_dev), p_state_addr); + if (ret) + goto out_free; + + /* Query power management information from QAT FW */ + ret = adf_get_pm_info(accel_dev, p_state_addr, PAGE_SIZE); + dma_unmap_single(&GET_DEV(accel_dev), p_state_addr, PAGE_SIZE, + DMA_FROM_DEVICE); + if (ret) + goto out_free; + + pm_info_regs = (u32 *)pm_info; + + /* Fuse control register */ + len += scnprintf(&pm_kv[len], PAGE_SIZE - len, + "----------- PM Fuse info ---------\n"); + len += adf_pm_scnprint_table_lower_keys(&pm_kv[len], pm_fuse_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_fuse_rows)); + + /* Power management */ + len += scnprintf(&pm_kv[len], PAGE_SIZE - len, + "----------- PM Info --------------\n"); + + len += adf_pm_scnprint_table_lower_keys(&pm_kv[len], pm_info_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_info_rows)); + len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "pm_mode: ACTIVE\n"); + + /* Shared Slice Module */ + len += scnprintf(&pm_kv[len], PAGE_SIZE - len, + "----------- SSM_PM Info ----------\n"); + len += adf_pm_scnprint_table_lower_keys(&pm_kv[len], pm_ssm_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_ssm_rows)); + + /* Control status register content */ + len += scnprintf(&pm_kv[len], PAGE_SIZE - len, + "----------- HW PM CSRs -----------\n"); + len += adf_pm_scnprint_table_upper_keys(&pm_kv[len], pm_csrs_rows, + pm_info_regs, PAGE_SIZE - len, + ARRAY_SIZE(pm_csrs_rows)); + + val = ADF_CSR_RD(pmisc, ADF_GEN6_PM_INTERRUPT); + len += scnprintf(&pm_kv[len], PAGE_SIZE - len, "CPM_PM_INTERRUPT: %#x\n", val); + ret = simple_read_from_buffer(buf, count, pos, pm_kv, len); + +out_free: + kfree(pm_info); + kfree(pm_kv); + + return ret; +} + +void adf_gen6_init_dev_pm_data(struct adf_accel_dev *accel_dev) +{ + accel_dev->power_management.print_pm_status = adf_gen6_print_pm_status; + accel_dev->power_management.present = true; +} +EXPORT_SYMBOL_GPL(adf_gen6_init_dev_pm_data); From c7f49dadfcdf27e1f747442e874e9baa52ab7674 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 8 Jul 2025 12:38:28 -0700 Subject: [PATCH 090/116] crypto: x86/aegis - Fix sleeping when disallowed on PREEMPT_RT skcipher_walk_done() can call kfree(), which takes a spinlock, which makes it incorrect to call while preemption is disabled on PREEMPT_RT. Therefore, end the kernel-mode FPU section before calling skcipher_walk_done(), and restart it afterwards. Moreover, pass atomic=false to skcipher_walk_aead_encrypt() instead of atomic=true. The point of atomic=true was to make skcipher_walk_done() safe to call while in a kernel-mode FPU section, but that does not actually work. So just use the usual atomic=false. Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aegis128-aesni-glue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index f1b6d40154e3..3cb5c193038b 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -119,7 +119,9 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state, walk->dst.virt.addr, round_down(walk->nbytes, AEGIS128_BLOCK_SIZE)); + kernel_fpu_end(); skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); + kernel_fpu_begin(); } if (walk->nbytes) { @@ -131,7 +133,9 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state, aegis128_aesni_dec_tail(state, walk->src.virt.addr, walk->dst.virt.addr, walk->nbytes); + kernel_fpu_end(); skcipher_walk_done(walk, 0); + kernel_fpu_begin(); } } @@ -176,9 +180,9 @@ crypto_aegis128_aesni_crypt(struct aead_request *req, struct aegis_state state; if (enc) - skcipher_walk_aead_encrypt(&walk, req, true); + skcipher_walk_aead_encrypt(&walk, req, false); else - skcipher_walk_aead_decrypt(&walk, req, true); + skcipher_walk_aead_decrypt(&walk, req, false); kernel_fpu_begin(); From 3d9eb180fbe8828cce43bce4c370124685b205c3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 8 Jul 2025 12:38:29 -0700 Subject: [PATCH 091/116] crypto: x86/aegis - Add missing error checks The skcipher_walk functions can allocate memory and can fail, so checking for errors is necessary. Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/x86/crypto/aegis128-aesni-glue.c | 36 +++++++++++++++++++-------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c index 3cb5c193038b..f1adfba1a76e 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -104,10 +104,12 @@ static void crypto_aegis128_aesni_process_ad( } } -static __always_inline void +static __always_inline int crypto_aegis128_aesni_process_crypt(struct aegis_state *state, struct skcipher_walk *walk, bool enc) { + int err = 0; + while (walk->nbytes >= AEGIS128_BLOCK_SIZE) { if (enc) aegis128_aesni_enc(state, walk->src.virt.addr, @@ -120,7 +122,8 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state, round_down(walk->nbytes, AEGIS128_BLOCK_SIZE)); kernel_fpu_end(); - skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); + err = skcipher_walk_done(walk, + walk->nbytes % AEGIS128_BLOCK_SIZE); kernel_fpu_begin(); } @@ -134,9 +137,10 @@ crypto_aegis128_aesni_process_crypt(struct aegis_state *state, walk->dst.virt.addr, walk->nbytes); kernel_fpu_end(); - skcipher_walk_done(walk, 0); + err = skcipher_walk_done(walk, 0); kernel_fpu_begin(); } + return err; } static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead) @@ -169,7 +173,7 @@ static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm, return 0; } -static __always_inline void +static __always_inline int crypto_aegis128_aesni_crypt(struct aead_request *req, struct aegis_block *tag_xor, unsigned int cryptlen, bool enc) @@ -178,20 +182,24 @@ crypto_aegis128_aesni_crypt(struct aead_request *req, struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm); struct skcipher_walk walk; struct aegis_state state; + int err; if (enc) - skcipher_walk_aead_encrypt(&walk, req, false); + err = skcipher_walk_aead_encrypt(&walk, req, false); else - skcipher_walk_aead_decrypt(&walk, req, false); + err = skcipher_walk_aead_decrypt(&walk, req, false); + if (err) + return err; kernel_fpu_begin(); aegis128_aesni_init(&state, &ctx->key, req->iv); crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); - crypto_aegis128_aesni_process_crypt(&state, &walk, enc); - aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); - + err = crypto_aegis128_aesni_process_crypt(&state, &walk, enc); + if (err == 0) + aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); kernel_fpu_end(); + return err; } static int crypto_aegis128_aesni_encrypt(struct aead_request *req) @@ -200,8 +208,11 @@ static int crypto_aegis128_aesni_encrypt(struct aead_request *req) struct aegis_block tag = {}; unsigned int authsize = crypto_aead_authsize(tfm); unsigned int cryptlen = req->cryptlen; + int err; - crypto_aegis128_aesni_crypt(req, &tag, cryptlen, true); + err = crypto_aegis128_aesni_crypt(req, &tag, cryptlen, true); + if (err) + return err; scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen, authsize, 1); @@ -216,11 +227,14 @@ static int crypto_aegis128_aesni_decrypt(struct aead_request *req) struct aegis_block tag; unsigned int authsize = crypto_aead_authsize(tfm); unsigned int cryptlen = req->cryptlen - authsize; + int err; scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen, authsize, 0); - crypto_aegis128_aesni_crypt(req, &tag, cryptlen, false); + err = crypto_aegis128_aesni_crypt(req, &tag, cryptlen, false); + if (err) + return err; return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; } From 962ddc5a7a4b04c007bba0f3e7298cda13c62efd Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 8 Jul 2025 17:59:54 -0700 Subject: [PATCH 092/116] crypto: acomp - Fix CFI failure due to type punning To avoid a crash when control flow integrity is enabled, make the workspace ("stream") free function use a consistent type, and call it through a function pointer that has that same type. Fixes: 42d9f6c77479 ("crypto: acomp - Move scomp stream allocation code into acomp") Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- crypto/deflate.c | 7 ++++++- crypto/zstd.c | 7 ++++++- include/crypto/internal/acompress.h | 5 +---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/crypto/deflate.c b/crypto/deflate.c index fe8e4ad0fee1..21404515dc77 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -48,9 +48,14 @@ static void *deflate_alloc_stream(void) return ctx; } +static void deflate_free_stream(void *ctx) +{ + kvfree(ctx); +} + static struct crypto_acomp_streams deflate_streams = { .alloc_ctx = deflate_alloc_stream, - .cfree_ctx = kvfree, + .free_ctx = deflate_free_stream, }; static int deflate_compress_one(struct acomp_req *req, diff --git a/crypto/zstd.c b/crypto/zstd.c index ebeadc1f3b5f..c2a19cb0879d 100644 --- a/crypto/zstd.c +++ b/crypto/zstd.c @@ -54,9 +54,14 @@ static void *zstd_alloc_stream(void) return ctx; } +static void zstd_free_stream(void *ctx) +{ + kvfree(ctx); +} + static struct crypto_acomp_streams zstd_streams = { .alloc_ctx = zstd_alloc_stream, - .cfree_ctx = kvfree, + .free_ctx = zstd_free_stream, }; static int zstd_init(struct crypto_acomp *acomp_tfm) diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h index ffffd88bbbad..2d97440028ff 100644 --- a/include/crypto/internal/acompress.h +++ b/include/crypto/internal/acompress.h @@ -63,10 +63,7 @@ struct crypto_acomp_stream { struct crypto_acomp_streams { /* These must come first because of struct scomp_alg. */ void *(*alloc_ctx)(void); - union { - void (*free_ctx)(void *); - void (*cfree_ctx)(const void *); - }; + void (*free_ctx)(void *); struct crypto_acomp_stream __percpu *streams; struct work_struct stream_work; From b19f1ab8d5bf417e00d5855c62e061fb449b13c5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 9 Jul 2025 00:11:40 -0700 Subject: [PATCH 093/116] crypto: krb5 - Fix memory leak in krb5_test_one_prf() Fix a leak reported by kmemleak: unreferenced object 0xffff8880093bf7a0 (size 32): comm "swapper/0", pid 1, jiffies 4294877529 hex dump (first 32 bytes): 9d 18 86 16 f6 38 52 fe 86 91 5b b8 40 b4 a8 86 .....8R...[.@... ff 3e 6b b0 f8 19 b4 9b 89 33 93 d3 93 85 42 95 .>k......3....B. backtrace (crc 8ba12f3b): kmemleak_alloc+0x8d/0xa0 __kmalloc_noprof+0x3cd/0x4d0 prep_buf+0x36/0x70 load_buf+0x10d/0x1c0 krb5_test_one_prf+0x1e1/0x3c0 krb5_selftest.cold+0x7c/0x54c crypto_krb5_init+0xd/0x20 do_one_initcall+0xa5/0x230 do_initcalls+0x213/0x250 kernel_init_freeable+0x220/0x260 kernel_init+0x1d/0x170 ret_from_fork+0x301/0x410 ret_from_fork_asm+0x1a/0x30 Fixes: fc0cf10c04f4 ("crypto/krb5: Implement crypto self-testing") Signed-off-by: Eric Biggers Acked-by: David Howells Signed-off-by: Herbert Xu --- crypto/krb5/selftest.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/krb5/selftest.c b/crypto/krb5/selftest.c index 2a81a6315a0d..4519c572d37e 100644 --- a/crypto/krb5/selftest.c +++ b/crypto/krb5/selftest.c @@ -152,6 +152,7 @@ static int krb5_test_one_prf(const struct krb5_prf_test *test) out: clear_buf(&result); + clear_buf(&prf); clear_buf(&octet); clear_buf(&key); return ret; From e85334656836c161ee9d87b52ea09776828e7bcd Mon Sep 17 00:00:00 2001 From: Vijay Sundar Selvamani Date: Thu, 10 Jul 2025 07:39:43 +0100 Subject: [PATCH 094/116] crypto: qat - add decompression service to telemetry QAT GEN6 devices offer decompression as an additional service. Update the telemetry ring pair service interface to support monitoring decompression operations. Co-developed-by: George Abraham P Signed-off-by: George Abraham P Signed-off-by: Vijay Sundar Selvamani Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c b/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c index f20ae7e35a0d..a32db273842a 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c @@ -538,6 +538,9 @@ static void tl_print_rp_srv(struct adf_accel_dev *accel_dev, struct seq_file *s, case ASYM: seq_printf(s, "%*s\n", TL_VALUE_MIN_PADDING, ADF_CFG_ASYM); break; + case DECOMP: + seq_printf(s, "%*s\n", TL_VALUE_MIN_PADDING, ADF_CFG_DECOMP); + break; default: seq_printf(s, "%*s\n", TL_VALUE_MIN_PADDING, TL_RP_SRV_UNKNOWN); break; From c6b012a26cf9aab56bef8e09f02818c3b79cfd95 Mon Sep 17 00:00:00 2001 From: Vijay Sundar Selvamani Date: Thu, 10 Jul 2025 07:39:44 +0100 Subject: [PATCH 095/116] crypto: qat - enable telemetry for GEN6 devices Enable telemetry for QAT GEN6 devices by defining the firmware data structures layouts, implementing the counters parsing logic and setting the required properties on the adf_tl_hw_data data structure. As for QAT GEN4, telemetry counters are exposed via debugfs using the interface described in Documentation/ABI/testing/debugfs-driver-qat_telemetry. Co-developed-by: George Abraham P Signed-off-by: George Abraham P Signed-off-by: Vijay Sundar Selvamani Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../intel/qat/qat_6xxx/adf_6xxx_hw_data.c | 3 + drivers/crypto/intel/qat/qat_common/Makefile | 1 + .../crypto/intel/qat/qat_common/adf_gen6_tl.c | 146 +++++++++++++ .../crypto/intel/qat/qat_common/adf_gen6_tl.h | 198 ++++++++++++++++++ 4 files changed, 348 insertions(+) create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen6_tl.c create mode 100644 drivers/crypto/intel/qat/qat_common/adf_gen6_tl.h diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c index ecef3dc28a91..d3f1034f33fb 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "adf_6xxx_hw_data.h" #include "icp_qat_fw_comp.h" @@ -861,12 +862,14 @@ void adf_init_hw_data_6xxx(struct adf_hw_device_data *hw_data) hw_data->init_device = adf_init_device; hw_data->enable_pm = enable_pm; hw_data->services_supported = services_supported; + hw_data->num_rps = ADF_GEN6_ETR_MAX_BANKS; adf_gen6_init_hw_csr_ops(&hw_data->csr_ops); adf_gen6_init_pf_pfvf_ops(&hw_data->pfvf_ops); adf_gen6_init_dc_ops(&hw_data->dc_ops); adf_gen6_init_vf_mig_ops(&hw_data->vfmig_ops); adf_gen6_init_ras_ops(&hw_data->ras_ops); + adf_gen6_init_tl_data(&hw_data->tl_data); } void adf_clean_hw_data_6xxx(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 34019d8637a5..89845754841b 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -50,6 +50,7 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_cnv_dbgfs.o \ adf_gen4_pm_debugfs.o \ adf_gen4_tl.o \ adf_gen6_pm_dbgfs.o \ + adf_gen6_tl.o \ adf_heartbeat_dbgfs.o \ adf_heartbeat.o \ adf_pm_dbgfs.o \ diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_tl.c b/drivers/crypto/intel/qat/qat_common/adf_gen6_tl.c new file mode 100644 index 000000000000..cf804f95838a --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_tl.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Intel Corporation. */ +#include + +#include "adf_gen6_tl.h" +#include "adf_telemetry.h" +#include "adf_tl_debugfs.h" +#include "icp_qat_fw_init_admin.h" + +#define ADF_GEN6_TL_DEV_REG_OFF(reg) ADF_TL_DEV_REG_OFF(reg, gen6) + +#define ADF_GEN6_TL_RP_REG_OFF(reg) ADF_TL_RP_REG_OFF(reg, gen6) + +#define ADF_GEN6_TL_SL_UTIL_COUNTER(_name) \ + ADF_TL_COUNTER("util_" #_name, ADF_TL_SIMPLE_COUNT, \ + ADF_TL_SLICE_REG_OFF(_name, reg_tm_slice_util, gen6)) + +#define ADF_GEN6_TL_SL_EXEC_COUNTER(_name) \ + ADF_TL_COUNTER("exec_" #_name, ADF_TL_SIMPLE_COUNT, \ + ADF_TL_SLICE_REG_OFF(_name, reg_tm_slice_exec_cnt, gen6)) + +#define SLICE_IDX(sl) offsetof(struct icp_qat_fw_init_admin_slice_cnt, sl##_cnt) + +/* Device level counters. */ +static const struct adf_tl_dbg_counter dev_counters[] = { + /* PCIe partial transactions. */ + ADF_TL_COUNTER(PCI_TRANS_CNT_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_prt_trans_cnt)), + /* Max read latency[ns]. */ + ADF_TL_COUNTER(MAX_RD_LAT_NAME, ADF_TL_COUNTER_NS, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_rd_lat_max)), + /* Read latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(RD_LAT_ACC_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_rd_lat_acc), + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_rd_cmpl_cnt)), + /* Max "get to put" latency[ns]. */ + ADF_TL_COUNTER(MAX_LAT_NAME, ADF_TL_COUNTER_NS, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_gp_lat_max)), + /* "Get to put" latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(LAT_ACC_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_gp_lat_acc), + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_ae_put_cnt)), + /* PCIe write bandwidth[Mbps]. */ + ADF_TL_COUNTER(BW_IN_NAME, ADF_TL_COUNTER_MBPS, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_bw_in)), + /* PCIe read bandwidth[Mbps]. */ + ADF_TL_COUNTER(BW_OUT_NAME, ADF_TL_COUNTER_MBPS, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_bw_out)), + /* Page request latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(PAGE_REQ_LAT_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_at_page_req_lat_acc), + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_at_page_req_cnt)), + /* Page translation latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(AT_TRANS_LAT_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_at_trans_lat_acc), + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_at_trans_lat_cnt)), + /* Maximum uTLB used. */ + ADF_TL_COUNTER(AT_MAX_UTLB_USED_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN6_TL_DEV_REG_OFF(reg_tl_at_max_utlb_used)), +}; + +/* Accelerator utilization counters */ +static const struct adf_tl_dbg_counter sl_util_counters[ADF_TL_SL_CNT_COUNT] = { + /* Compression accelerator utilization. */ + [SLICE_IDX(cpr)] = ADF_GEN6_TL_SL_UTIL_COUNTER(cnv), + /* Decompression accelerator utilization. */ + [SLICE_IDX(dcpr)] = ADF_GEN6_TL_SL_UTIL_COUNTER(dcprz), + /* PKE accelerator utilization. */ + [SLICE_IDX(pke)] = ADF_GEN6_TL_SL_UTIL_COUNTER(pke), + /* Wireless Authentication accelerator utilization. */ + [SLICE_IDX(wat)] = ADF_GEN6_TL_SL_UTIL_COUNTER(wat), + /* Wireless Cipher accelerator utilization. */ + [SLICE_IDX(wcp)] = ADF_GEN6_TL_SL_UTIL_COUNTER(wcp), + /* UCS accelerator utilization. */ + [SLICE_IDX(ucs)] = ADF_GEN6_TL_SL_UTIL_COUNTER(ucs), + /* Authentication accelerator utilization. */ + [SLICE_IDX(ath)] = ADF_GEN6_TL_SL_UTIL_COUNTER(ath), +}; + +/* Accelerator execution counters */ +static const struct adf_tl_dbg_counter sl_exec_counters[ADF_TL_SL_CNT_COUNT] = { + /* Compression accelerator execution count. */ + [SLICE_IDX(cpr)] = ADF_GEN6_TL_SL_EXEC_COUNTER(cnv), + /* Decompression accelerator execution count. */ + [SLICE_IDX(dcpr)] = ADF_GEN6_TL_SL_EXEC_COUNTER(dcprz), + /* PKE execution count. */ + [SLICE_IDX(pke)] = ADF_GEN6_TL_SL_EXEC_COUNTER(pke), + /* Wireless Authentication accelerator execution count. */ + [SLICE_IDX(wat)] = ADF_GEN6_TL_SL_EXEC_COUNTER(wat), + /* Wireless Cipher accelerator execution count. */ + [SLICE_IDX(wcp)] = ADF_GEN6_TL_SL_EXEC_COUNTER(wcp), + /* UCS accelerator execution count. */ + [SLICE_IDX(ucs)] = ADF_GEN6_TL_SL_EXEC_COUNTER(ucs), + /* Authentication accelerator execution count. */ + [SLICE_IDX(ath)] = ADF_GEN6_TL_SL_EXEC_COUNTER(ath), +}; + +/* Ring pair counters. */ +static const struct adf_tl_dbg_counter rp_counters[] = { + /* PCIe partial transactions. */ + ADF_TL_COUNTER(PCI_TRANS_CNT_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN6_TL_RP_REG_OFF(reg_tl_prt_trans_cnt)), + /* "Get to put" latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(LAT_ACC_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN6_TL_RP_REG_OFF(reg_tl_gp_lat_acc), + ADF_GEN6_TL_RP_REG_OFF(reg_tl_ae_put_cnt)), + /* PCIe write bandwidth[Mbps]. */ + ADF_TL_COUNTER(BW_IN_NAME, ADF_TL_COUNTER_MBPS, + ADF_GEN6_TL_RP_REG_OFF(reg_tl_bw_in)), + /* PCIe read bandwidth[Mbps]. */ + ADF_TL_COUNTER(BW_OUT_NAME, ADF_TL_COUNTER_MBPS, + ADF_GEN6_TL_RP_REG_OFF(reg_tl_bw_out)), + /* Message descriptor DevTLB hit rate. */ + ADF_TL_COUNTER(AT_GLOB_DTLB_HIT_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN6_TL_RP_REG_OFF(reg_tl_at_glob_devtlb_hit)), + /* Message descriptor DevTLB miss rate. */ + ADF_TL_COUNTER(AT_GLOB_DTLB_MISS_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN6_TL_RP_REG_OFF(reg_tl_at_glob_devtlb_miss)), + /* Payload DevTLB hit rate. */ + ADF_TL_COUNTER(AT_PAYLD_DTLB_HIT_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN6_TL_RP_REG_OFF(reg_tl_at_payld_devtlb_hit)), + /* Payload DevTLB miss rate. */ + ADF_TL_COUNTER(AT_PAYLD_DTLB_MISS_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN6_TL_RP_REG_OFF(reg_tl_at_payld_devtlb_miss)), +}; + +void adf_gen6_init_tl_data(struct adf_tl_hw_data *tl_data) +{ + tl_data->layout_sz = ADF_GEN6_TL_LAYOUT_SZ; + tl_data->slice_reg_sz = ADF_GEN6_TL_SLICE_REG_SZ; + tl_data->rp_reg_sz = ADF_GEN6_TL_RP_REG_SZ; + tl_data->num_hbuff = ADF_GEN6_TL_NUM_HIST_BUFFS; + tl_data->max_rp = ADF_GEN6_TL_MAX_RP_NUM; + tl_data->msg_cnt_off = ADF_GEN6_TL_MSG_CNT_OFF; + tl_data->cpp_ns_per_cycle = ADF_GEN6_CPP_NS_PER_CYCLE; + tl_data->bw_units_to_bytes = ADF_GEN6_TL_BW_HW_UNITS_TO_BYTES; + + tl_data->dev_counters = dev_counters; + tl_data->num_dev_counters = ARRAY_SIZE(dev_counters); + tl_data->sl_util_counters = sl_util_counters; + tl_data->sl_exec_counters = sl_exec_counters; + tl_data->rp_counters = rp_counters; + tl_data->num_rp_counters = ARRAY_SIZE(rp_counters); + tl_data->max_sl_cnt = ADF_GEN6_TL_MAX_SLICES_PER_TYPE; +} +EXPORT_SYMBOL_GPL(adf_gen6_init_tl_data); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen6_tl.h b/drivers/crypto/intel/qat/qat_common/adf_gen6_tl.h new file mode 100644 index 000000000000..49db660b8eb9 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen6_tl.h @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2025 Intel Corporation. */ +#ifndef ADF_GEN6_TL_H +#define ADF_GEN6_TL_H + +#include + +struct adf_tl_hw_data; + +/* Computation constants. */ +#define ADF_GEN6_CPP_NS_PER_CYCLE 2 +#define ADF_GEN6_TL_BW_HW_UNITS_TO_BYTES 64 + +/* Maximum aggregation time. Value is in milliseconds. */ +#define ADF_GEN6_TL_MAX_AGGR_TIME_MS 4000 +/* Number of buffers to store historic values. */ +#define ADF_GEN6_TL_NUM_HIST_BUFFS \ + (ADF_GEN6_TL_MAX_AGGR_TIME_MS / ADF_TL_DATA_WR_INTERVAL_MS) + +/* Max number of HW resources of one type */ +#define ADF_GEN6_TL_MAX_SLICES_PER_TYPE 32 +#define MAX_ATH_SL_COUNT 7 +#define MAX_CNV_SL_COUNT 2 +#define MAX_DCPRZ_SL_COUNT 2 +#define MAX_PKE_SL_COUNT 32 +#define MAX_UCS_SL_COUNT 4 +#define MAX_WAT_SL_COUNT 5 +#define MAX_WCP_SL_COUNT 5 + +#define MAX_ATH_CMDQ_COUNT 14 +#define MAX_CNV_CMDQ_COUNT 6 +#define MAX_DCPRZ_CMDQ_COUNT 6 +#define MAX_PKE_CMDQ_COUNT 32 +#define MAX_UCS_CMDQ_COUNT 12 +#define MAX_WAT_CMDQ_COUNT 35 +#define MAX_WCP_CMDQ_COUNT 35 + +/* Max number of simultaneously monitored ring pairs. */ +#define ADF_GEN6_TL_MAX_RP_NUM 4 + +/** + * struct adf_gen6_tl_slice_data_regs - HW slice data as populated by FW. + * @reg_tm_slice_exec_cnt: Slice execution count. + * @reg_tm_slice_util: Slice utilization. + */ +struct adf_gen6_tl_slice_data_regs { + __u32 reg_tm_slice_exec_cnt; + __u32 reg_tm_slice_util; +}; + +#define ADF_GEN6_TL_SLICE_REG_SZ sizeof(struct adf_gen6_tl_slice_data_regs) + +/** + * struct adf_gen6_tl_cmdq_data_regs - HW CMDQ data as populated by FW. + * @reg_tm_cmdq_wait_cnt: CMDQ wait count. + * @reg_tm_cmdq_exec_cnt: CMDQ execution count. + * @reg_tm_cmdq_drain_cnt: CMDQ drain count. + */ +struct adf_gen6_tl_cmdq_data_regs { + __u32 reg_tm_cmdq_wait_cnt; + __u32 reg_tm_cmdq_exec_cnt; + __u32 reg_tm_cmdq_drain_cnt; + __u32 reserved; +}; + +#define ADF_GEN6_TL_CMDQ_REG_SZ sizeof(struct adf_gen6_tl_cmdq_data_regs) + +/** + * struct adf_gen6_tl_device_data_regs - This structure stores device telemetry + * counter values as are being populated periodically by device. + * @reg_tl_rd_lat_acc: read latency accumulator + * @reg_tl_gp_lat_acc: "get to put" latency accumulator + * @reg_tl_at_page_req_lat_acc: AT/DevTLB page request latency accumulator + * @reg_tl_at_trans_lat_acc: DevTLB transaction latency accumulator + * @reg_tl_re_acc: accumulated ring empty time + * @reg_tl_prt_trans_cnt: PCIe partial transactions + * @reg_tl_rd_lat_max: maximum logged read latency + * @reg_tl_rd_cmpl_cnt: read requests completed count + * @reg_tl_gp_lat_max: maximum logged get to put latency + * @reg_tl_ae_put_cnt: Accelerator Engine put counts across all rings + * @reg_tl_bw_in: PCIe write bandwidth + * @reg_tl_bw_out: PCIe read bandwidth + * @reg_tl_at_page_req_cnt: DevTLB page requests count + * @reg_tl_at_trans_lat_cnt: DevTLB transaction latency samples count + * @reg_tl_at_max_utlb_used: maximum uTLB used + * @reg_tl_re_cnt: ring empty time samples count + * @reserved: reserved + * @ath_slices: array of Authentication slices utilization registers + * @cnv_slices: array of Compression slices utilization registers + * @dcprz_slices: array of Decompression slices utilization registers + * @pke_slices: array of PKE slices utilization registers + * @ucs_slices: array of UCS slices utilization registers + * @wat_slices: array of Wireless Authentication slices utilization registers + * @wcp_slices: array of Wireless Cipher slices utilization registers + * @ath_cmdq: array of Authentication cmdq telemetry registers + * @cnv_cmdq: array of Compression cmdq telemetry registers + * @dcprz_cmdq: array of Decomopression cmdq telemetry registers + * @pke_cmdq: array of PKE cmdq telemetry registers + * @ucs_cmdq: array of UCS cmdq telemetry registers + * @wat_cmdq: array of Wireless Authentication cmdq telemetry registers + * @wcp_cmdq: array of Wireless Cipher cmdq telemetry registers + */ +struct adf_gen6_tl_device_data_regs { + __u64 reg_tl_rd_lat_acc; + __u64 reg_tl_gp_lat_acc; + __u64 reg_tl_at_page_req_lat_acc; + __u64 reg_tl_at_trans_lat_acc; + __u64 reg_tl_re_acc; + __u32 reg_tl_prt_trans_cnt; + __u32 reg_tl_rd_lat_max; + __u32 reg_tl_rd_cmpl_cnt; + __u32 reg_tl_gp_lat_max; + __u32 reg_tl_ae_put_cnt; + __u32 reg_tl_bw_in; + __u32 reg_tl_bw_out; + __u32 reg_tl_at_page_req_cnt; + __u32 reg_tl_at_trans_lat_cnt; + __u32 reg_tl_at_max_utlb_used; + __u32 reg_tl_re_cnt; + __u32 reserved; + struct adf_gen6_tl_slice_data_regs ath_slices[MAX_ATH_SL_COUNT]; + struct adf_gen6_tl_slice_data_regs cnv_slices[MAX_CNV_SL_COUNT]; + struct adf_gen6_tl_slice_data_regs dcprz_slices[MAX_DCPRZ_SL_COUNT]; + struct adf_gen6_tl_slice_data_regs pke_slices[MAX_PKE_SL_COUNT]; + struct adf_gen6_tl_slice_data_regs ucs_slices[MAX_UCS_SL_COUNT]; + struct adf_gen6_tl_slice_data_regs wat_slices[MAX_WAT_SL_COUNT]; + struct adf_gen6_tl_slice_data_regs wcp_slices[MAX_WCP_SL_COUNT]; + struct adf_gen6_tl_cmdq_data_regs ath_cmdq[MAX_ATH_CMDQ_COUNT]; + struct adf_gen6_tl_cmdq_data_regs cnv_cmdq[MAX_CNV_CMDQ_COUNT]; + struct adf_gen6_tl_cmdq_data_regs dcprz_cmdq[MAX_DCPRZ_CMDQ_COUNT]; + struct adf_gen6_tl_cmdq_data_regs pke_cmdq[MAX_PKE_CMDQ_COUNT]; + struct adf_gen6_tl_cmdq_data_regs ucs_cmdq[MAX_UCS_CMDQ_COUNT]; + struct adf_gen6_tl_cmdq_data_regs wat_cmdq[MAX_WAT_CMDQ_COUNT]; + struct adf_gen6_tl_cmdq_data_regs wcp_cmdq[MAX_WCP_CMDQ_COUNT]; +}; + +/** + * struct adf_gen6_tl_ring_pair_data_regs - This structure stores ring pair + * telemetry counter values as they are being populated periodically by device. + * @reg_tl_gp_lat_acc: get-put latency accumulator + * @reg_tl_re_acc: accumulated ring empty time + * @reg_tl_pci_trans_cnt: PCIe partial transactions + * @reg_tl_ae_put_cnt: Accelerator Engine put counts across all rings + * @reg_tl_bw_in: PCIe write bandwidth + * @reg_tl_bw_out: PCIe read bandwidth + * @reg_tl_at_glob_devtlb_hit: Message descriptor DevTLB hit rate + * @reg_tl_at_glob_devtlb_miss: Message descriptor DevTLB miss rate + * @reg_tl_at_payld_devtlb_hit: Payload DevTLB hit rate + * @reg_tl_at_payld_devtlb_miss: Payload DevTLB miss rate + * @reg_tl_re_cnt: ring empty time samples count + * @reserved1: reserved + */ +struct adf_gen6_tl_ring_pair_data_regs { + __u64 reg_tl_gp_lat_acc; + __u64 reg_tl_re_acc; + __u32 reg_tl_prt_trans_cnt; + __u32 reg_tl_ae_put_cnt; + __u32 reg_tl_bw_in; + __u32 reg_tl_bw_out; + __u32 reg_tl_at_glob_devtlb_hit; + __u32 reg_tl_at_glob_devtlb_miss; + __u32 reg_tl_at_payld_devtlb_hit; + __u32 reg_tl_at_payld_devtlb_miss; + __u32 reg_tl_re_cnt; + __u32 reserved1; +}; + +#define ADF_GEN6_TL_RP_REG_SZ sizeof(struct adf_gen6_tl_ring_pair_data_regs) + +/** + * struct adf_gen6_tl_layout - This structure represents the entire telemetry + * counters data: Device + 4 Ring Pairs as they are being populated periodically + * by device. + * @tl_device_data_regs: structure of device telemetry registers + * @tl_ring_pairs_data_regs: array of ring pairs telemetry registers + * @reg_tl_msg_cnt: telemetry message counter + * @reserved: reserved + */ +struct adf_gen6_tl_layout { + struct adf_gen6_tl_device_data_regs tl_device_data_regs; + struct adf_gen6_tl_ring_pair_data_regs + tl_ring_pairs_data_regs[ADF_GEN6_TL_MAX_RP_NUM]; + __u32 reg_tl_msg_cnt; + __u32 reserved; +}; + +#define ADF_GEN6_TL_LAYOUT_SZ sizeof(struct adf_gen6_tl_layout) +#define ADF_GEN6_TL_MSG_CNT_OFF \ + offsetof(struct adf_gen6_tl_layout, reg_tl_msg_cnt) + +#ifdef CONFIG_DEBUG_FS +void adf_gen6_init_tl_data(struct adf_tl_hw_data *tl_data); +#else +static inline void adf_gen6_init_tl_data(struct adf_tl_hw_data *tl_data) +{ +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* ADF_GEN6_TL_H */ From f9c4923ca891c83ffad5f9a9fce666a5fa24a7f4 Mon Sep 17 00:00:00 2001 From: Vijay Sundar Selvamani Date: Thu, 10 Jul 2025 07:39:45 +0100 Subject: [PATCH 096/116] Documentation: qat: update debugfs-driver-qat_telemetry for GEN6 devices Expands telemetry documentation for supporting QAT GEN6 device. Introduces new parameters to capture compression, decompression slice utilization and execution count. Co-developed-by: George Abraham P Signed-off-by: George Abraham P Signed-off-by: Vijay Sundar Selvamani Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- Documentation/ABI/testing/debugfs-driver-qat_telemetry | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-driver-qat_telemetry b/Documentation/ABI/testing/debugfs-driver-qat_telemetry index eacee2072088..0dfd8d97e169 100644 --- a/Documentation/ABI/testing/debugfs-driver-qat_telemetry +++ b/Documentation/ABI/testing/debugfs-driver-qat_telemetry @@ -32,7 +32,7 @@ Description: (RW) Enables/disables the reporting of telemetry metrics. echo 0 > /sys/kernel/debug/qat_4xxx_0000:6b:00.0/telemetry/control - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. What: /sys/kernel/debug/qat__/telemetry/device_data Date: March 2024 @@ -67,6 +67,10 @@ Description: (RO) Reports device telemetry counters. exec_xlt execution count of Translator slice N util_dcpr utilization of Decompression slice N [%] exec_dcpr execution count of Decompression slice N + util_cnv utilization of Compression and verify slice N [%] + exec_cnv execution count of Compression and verify slice N + util_dcprz utilization of Decompression slice N [%] + exec_dcprz execution count of Decompression slice N util_pke utilization of PKE N [%] exec_pke execution count of PKE N util_ucs utilization of UCS slice N [%] @@ -100,7 +104,7 @@ Description: (RO) Reports device telemetry counters. If a device lacks of a specific accelerator, the corresponding attribute is not reported. - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. What: /sys/kernel/debug/qat__/telemetry/rp__data Date: March 2024 @@ -225,4 +229,4 @@ Description: (RW) Selects up to 4 Ring Pairs (RP) to monitor, one per file, ``rp2srv`` from sysfs. See Documentation/ABI/testing/sysfs-driver-qat for details. - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. From f0ae287c50455f7be0d8dd45a803d403c7aa4d2e Mon Sep 17 00:00:00 2001 From: Wenkai Lin Date: Thu, 10 Jul 2025 20:24:57 +0800 Subject: [PATCH 097/116] crypto: hisilicon/sec2 - implement full backlog mode for sec This patch introduces a hierarchical backlog mechanism to cache user data in high-throughput encryption/decryption scenarios, the implementation addresses packet loss issues when hardware queues overflow during peak loads. First, we use sec_alloc_req_id to obtain an exclusive resource from the pre-allocated resource pool of each queue, if no resource is allocated, perform the DMA map operation on the request memory. When the task is ready, we will attempt to send it to the hardware, if the hardware queue is already full, we cache the request into the backlog list, then return an EBUSY status to the upper layer and instruct the packet-sending thread to pause transmission. Simultaneously, when the hardware completes a task, it triggers the sec callback function, within this function, reattempt to send the requests from the backlog list and wake up the sending thread until the hardware queue becomes fully occupied again. In addition, it handles such exceptions like the hardware is reset when packets are sent, it will switch to the software computing and release occupied resources. Signed-off-by: Wenkai Lin Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/sec2/sec.h | 63 ++- drivers/crypto/hisilicon/sec2/sec_crypto.c | 582 ++++++++++++++------- 2 files changed, 461 insertions(+), 184 deletions(-) diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index 703920b49c7c..81d0beda93b2 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -7,6 +7,12 @@ #include #include "sec_crypto.h" +#define SEC_PBUF_SZ 512 +#define SEC_MAX_MAC_LEN 64 +#define SEC_IV_SIZE 24 +#define SEC_SGE_NR_NUM 4 +#define SEC_SGL_ALIGN_SIZE 64 + /* Algorithm resource per hardware SEC queue */ struct sec_alg_res { u8 *pbuf; @@ -20,6 +26,40 @@ struct sec_alg_res { u16 depth; }; +struct sec_hw_sge { + dma_addr_t buf; + void *page_ctrl; + __le32 len; + __le32 pad; + __le32 pad0; + __le32 pad1; +}; + +struct sec_hw_sgl { + dma_addr_t next_dma; + __le16 entry_sum_in_chain; + __le16 entry_sum_in_sgl; + __le16 entry_length_in_sgl; + __le16 pad0; + __le64 pad1[5]; + struct sec_hw_sgl *next; + struct sec_hw_sge sge_entries[SEC_SGE_NR_NUM]; +} __aligned(SEC_SGL_ALIGN_SIZE); + +struct sec_src_dst_buf { + struct sec_hw_sgl in; + struct sec_hw_sgl out; +}; + +struct sec_request_buf { + union { + struct sec_src_dst_buf data_buf; + __u8 pbuf[SEC_PBUF_SZ]; + }; + dma_addr_t in_dma; + dma_addr_t out_dma; +}; + /* Cipher request of SEC private */ struct sec_cipher_req { struct hisi_acc_hw_sgl *c_out; @@ -29,6 +69,7 @@ struct sec_cipher_req { struct skcipher_request *sk_req; u32 c_len; bool encrypt; + __u8 c_ivin_buf[SEC_IV_SIZE]; }; struct sec_aead_req { @@ -37,6 +78,13 @@ struct sec_aead_req { u8 *a_ivin; dma_addr_t a_ivin_dma; struct aead_request *aead_req; + __u8 a_ivin_buf[SEC_IV_SIZE]; + __u8 out_mac_buf[SEC_MAX_MAC_LEN]; +}; + +struct sec_instance_backlog { + struct list_head list; + spinlock_t lock; }; /* SEC request of Crypto */ @@ -55,15 +103,17 @@ struct sec_req { dma_addr_t in_dma; struct sec_cipher_req c_req; struct sec_aead_req aead_req; - struct list_head backlog_head; + struct crypto_async_request *base; int err_type; int req_id; u32 flag; - /* Status of the SEC request */ - bool fake_busy; bool use_pbuf; + + struct list_head list; + struct sec_instance_backlog *backlog; + struct sec_request_buf buf; }; /** @@ -119,9 +169,11 @@ struct sec_qp_ctx { struct sec_alg_res *res; struct sec_ctx *ctx; spinlock_t req_lock; - struct list_head backlog; + spinlock_t id_lock; struct hisi_acc_sgl_pool *c_in_pool; struct hisi_acc_sgl_pool *c_out_pool; + struct sec_instance_backlog backlog; + u16 send_head; }; enum sec_alg_type { @@ -139,9 +191,6 @@ struct sec_ctx { /* Half queues for encipher, and half for decipher */ u32 hlf_q_num; - /* Threshold for fake busy, trigger to return -EBUSY to user */ - u32 fake_req_limit; - /* Current cyclic index to select a queue for encipher */ atomic_t enc_qcyclic; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 7d04e770a8c2..d044ded0f290 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -67,7 +67,6 @@ #define SEC_MAX_CCM_AAD_LEN 65279 #define SEC_TOTAL_MAC_SZ(depth) (SEC_MAX_MAC_LEN * (depth)) -#define SEC_PBUF_SZ 512 #define SEC_PBUF_IV_OFFSET SEC_PBUF_SZ #define SEC_PBUF_MAC_OFFSET (SEC_PBUF_SZ + SEC_IV_SIZE) #define SEC_PBUF_PKG (SEC_PBUF_SZ + SEC_IV_SIZE + \ @@ -102,6 +101,8 @@ #define IV_LAST_BYTE_MASK 0xFF #define IV_CTR_INIT 0x1 #define IV_BYTE_OFFSET 0x8 +#define SEC_GCM_MIN_AUTH_SZ 0x8 +#define SEC_RETRY_MAX_CNT 5U static DEFINE_MUTEX(sec_algs_lock); static unsigned int sec_available_devs; @@ -116,40 +117,19 @@ struct sec_aead { struct aead_alg alg; }; -/* Get an en/de-cipher queue cyclically to balance load over queues of TFM */ -static inline u32 sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req) -{ - if (req->c_req.encrypt) - return (u32)atomic_inc_return(&ctx->enc_qcyclic) % - ctx->hlf_q_num; - - return (u32)atomic_inc_return(&ctx->dec_qcyclic) % ctx->hlf_q_num + - ctx->hlf_q_num; -} - -static inline void sec_free_queue_id(struct sec_ctx *ctx, struct sec_req *req) -{ - if (req->c_req.encrypt) - atomic_dec(&ctx->enc_qcyclic); - else - atomic_dec(&ctx->dec_qcyclic); -} +static int sec_aead_soft_crypto(struct sec_ctx *ctx, + struct aead_request *aead_req, + bool encrypt); +static int sec_skcipher_soft_crypto(struct sec_ctx *ctx, + struct skcipher_request *sreq, bool encrypt); static int sec_alloc_req_id(struct sec_req *req, struct sec_qp_ctx *qp_ctx) { int req_id; - spin_lock_bh(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->id_lock); req_id = idr_alloc_cyclic(&qp_ctx->req_idr, NULL, 0, qp_ctx->qp->sq_depth, GFP_ATOMIC); - spin_unlock_bh(&qp_ctx->req_lock); - if (unlikely(req_id < 0)) { - dev_err(req->ctx->dev, "alloc req id fail!\n"); - return req_id; - } - - req->qp_ctx = qp_ctx; - qp_ctx->req_list[req_id] = req; - + spin_unlock_bh(&qp_ctx->id_lock); return req_id; } @@ -163,12 +143,9 @@ static void sec_free_req_id(struct sec_req *req) return; } - qp_ctx->req_list[req_id] = NULL; - req->qp_ctx = NULL; - - spin_lock_bh(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->id_lock); idr_remove(&qp_ctx->req_idr, req_id); - spin_unlock_bh(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->id_lock); } static u8 pre_parse_finished_bd(struct bd_status *status, void *resp) @@ -229,6 +206,90 @@ static int sec_cb_status_check(struct sec_req *req, return 0; } +static int qp_send_message(struct sec_req *req) +{ + struct sec_qp_ctx *qp_ctx = req->qp_ctx; + int ret; + + if (atomic_read(&qp_ctx->qp->qp_status.used) == qp_ctx->qp->sq_depth - 1) + return -EBUSY; + + spin_lock_bh(&qp_ctx->req_lock); + if (atomic_read(&qp_ctx->qp->qp_status.used) == qp_ctx->qp->sq_depth - 1) { + spin_unlock_bh(&qp_ctx->req_lock); + return -EBUSY; + } + + if (qp_ctx->ctx->type_supported == SEC_BD_TYPE2) { + req->sec_sqe.type2.tag = cpu_to_le16((u16)qp_ctx->send_head); + qp_ctx->req_list[qp_ctx->send_head] = req; + } + + ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe); + if (ret) { + spin_unlock_bh(&qp_ctx->req_lock); + return ret; + } + if (qp_ctx->ctx->type_supported == SEC_BD_TYPE2) + qp_ctx->send_head = (qp_ctx->send_head + 1) % qp_ctx->qp->sq_depth; + + spin_unlock_bh(&qp_ctx->req_lock); + + atomic64_inc(&req->ctx->sec->debug.dfx.send_cnt); + return -EINPROGRESS; +} + +static void sec_alg_send_backlog_soft(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) +{ + struct sec_req *req, *tmp; + int ret; + + list_for_each_entry_safe(req, tmp, &qp_ctx->backlog.list, list) { + list_del(&req->list); + ctx->req_op->buf_unmap(ctx, req); + if (req->req_id >= 0) + sec_free_req_id(req); + + if (ctx->alg_type == SEC_AEAD) + ret = sec_aead_soft_crypto(ctx, req->aead_req.aead_req, + req->c_req.encrypt); + else + ret = sec_skcipher_soft_crypto(ctx, req->c_req.sk_req, + req->c_req.encrypt); + + /* Wake up the busy thread first, then return the errno. */ + crypto_request_complete(req->base, -EINPROGRESS); + crypto_request_complete(req->base, ret); + } +} + +static void sec_alg_send_backlog(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) +{ + struct sec_req *req, *tmp; + int ret; + + spin_lock_bh(&qp_ctx->backlog.lock); + list_for_each_entry_safe(req, tmp, &qp_ctx->backlog.list, list) { + ret = qp_send_message(req); + switch (ret) { + case -EINPROGRESS: + list_del(&req->list); + crypto_request_complete(req->base, -EINPROGRESS); + break; + case -EBUSY: + /* Device is busy and stop send any request. */ + goto unlock; + default: + /* Release memory resources and send all requests through software. */ + sec_alg_send_backlog_soft(ctx, qp_ctx); + goto unlock; + } + } + +unlock: + spin_unlock_bh(&qp_ctx->backlog.lock); +} + static void sec_req_cb(struct hisi_qp *qp, void *resp) { struct sec_qp_ctx *qp_ctx = qp->qp_ctx; @@ -273,40 +334,54 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp) ctx->req_op->callback(ctx, req, err); } -static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) +static int sec_alg_send_message_retry(struct sec_req *req) { - struct sec_qp_ctx *qp_ctx = req->qp_ctx; + int ctr = 0; int ret; - if (ctx->fake_req_limit <= - atomic_read(&qp_ctx->qp->qp_status.used) && - !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG)) - return -EBUSY; - - spin_lock_bh(&qp_ctx->req_lock); - ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe); - if (ctx->fake_req_limit <= - atomic_read(&qp_ctx->qp->qp_status.used) && !ret) { - list_add_tail(&req->backlog_head, &qp_ctx->backlog); - atomic64_inc(&ctx->sec->debug.dfx.send_cnt); - atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt); - spin_unlock_bh(&qp_ctx->req_lock); - return -EBUSY; - } - spin_unlock_bh(&qp_ctx->req_lock); - - if (unlikely(ret == -EBUSY)) - return -ENOBUFS; - - if (likely(!ret)) { - ret = -EINPROGRESS; - atomic64_inc(&ctx->sec->debug.dfx.send_cnt); - } + do { + ret = qp_send_message(req); + } while (ret == -EBUSY && ctr++ < SEC_RETRY_MAX_CNT); return ret; } -/* Get DMA memory resources */ +static int sec_alg_try_enqueue(struct sec_req *req) +{ + /* Check if any request is already backlogged */ + if (!list_empty(&req->backlog->list)) + return -EBUSY; + + /* Try to enqueue to HW ring */ + return qp_send_message(req); +} + + +static int sec_alg_send_message_maybacklog(struct sec_req *req) +{ + int ret; + + ret = sec_alg_try_enqueue(req); + if (ret != -EBUSY) + return ret; + + spin_lock_bh(&req->backlog->lock); + ret = sec_alg_try_enqueue(req); + if (ret == -EBUSY) + list_add_tail(&req->list, &req->backlog->list); + spin_unlock_bh(&req->backlog->lock); + + return ret; +} + +static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) +{ + if (req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG) + return sec_alg_send_message_maybacklog(req); + + return sec_alg_send_message_retry(req); +} + static int sec_alloc_civ_resource(struct device *dev, struct sec_alg_res *res) { u16 q_depth = res->depth; @@ -558,7 +633,10 @@ static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id) spin_lock_init(&qp_ctx->req_lock); idr_init(&qp_ctx->req_idr); - INIT_LIST_HEAD(&qp_ctx->backlog); + spin_lock_init(&qp_ctx->backlog.lock); + spin_lock_init(&qp_ctx->id_lock); + INIT_LIST_HEAD(&qp_ctx->backlog.list); + qp_ctx->send_head = 0; ret = sec_alloc_qp_ctx_resource(ctx, qp_ctx); if (ret) @@ -602,9 +680,6 @@ static int sec_ctx_base_init(struct sec_ctx *ctx) ctx->hlf_q_num = sec->ctx_q_num >> 1; ctx->pbuf_supported = ctx->sec->iommu_used; - - /* Half of queue depth is taken as fake requests limit in the queue. */ - ctx->fake_req_limit = ctx->qps[0]->sq_depth >> 1; ctx->qp_ctx = kcalloc(sec->ctx_q_num, sizeof(struct sec_qp_ctx), GFP_KERNEL); if (!ctx->qp_ctx) { @@ -706,7 +781,7 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm) int ret; ctx->alg_type = SEC_SKCIPHER; - crypto_skcipher_set_reqsize(tfm, sizeof(struct sec_req)); + crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct sec_req)); ctx->c_ctx.ivsize = crypto_skcipher_ivsize(tfm); if (ctx->c_ctx.ivsize > SEC_IV_SIZE) { pr_err("get error skcipher iv size!\n"); @@ -883,24 +958,25 @@ GEN_SEC_SETKEY_FUNC(sm4_ctr, SEC_CALG_SM4, SEC_CMODE_CTR) static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src) { - struct sec_aead_req *a_req = &req->aead_req; - struct aead_request *aead_req = a_req->aead_req; + struct aead_request *aead_req = req->aead_req.aead_req; struct sec_cipher_req *c_req = &req->c_req; struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct sec_request_buf *buf = &req->buf; struct device *dev = ctx->dev; int copy_size, pbuf_length; int req_id = req->req_id; struct crypto_aead *tfm; + u8 *mac_offset, *pbuf; size_t authsize; - u8 *mac_offset; if (ctx->alg_type == SEC_AEAD) copy_size = aead_req->cryptlen + aead_req->assoclen; else copy_size = c_req->c_len; - pbuf_length = sg_copy_to_buffer(src, sg_nents(src), - qp_ctx->res[req_id].pbuf, copy_size); + + pbuf = req->req_id < 0 ? buf->pbuf : qp_ctx->res[req_id].pbuf; + pbuf_length = sg_copy_to_buffer(src, sg_nents(src), pbuf, copy_size); if (unlikely(pbuf_length != copy_size)) { dev_err(dev, "copy src data to pbuf error!\n"); return -EINVAL; @@ -908,8 +984,17 @@ static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req, if (!c_req->encrypt && ctx->alg_type == SEC_AEAD) { tfm = crypto_aead_reqtfm(aead_req); authsize = crypto_aead_authsize(tfm); - mac_offset = qp_ctx->res[req_id].pbuf + copy_size - authsize; - memcpy(a_req->out_mac, mac_offset, authsize); + mac_offset = pbuf + copy_size - authsize; + memcpy(req->aead_req.out_mac, mac_offset, authsize); + } + + if (req->req_id < 0) { + buf->in_dma = dma_map_single(dev, buf->pbuf, SEC_PBUF_SZ, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(dev, buf->in_dma))) + return -ENOMEM; + + buf->out_dma = buf->in_dma; + return 0; } req->in_dma = qp_ctx->res[req_id].pbuf_dma; @@ -924,6 +1009,7 @@ static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req, struct aead_request *aead_req = req->aead_req.aead_req; struct sec_cipher_req *c_req = &req->c_req; struct sec_qp_ctx *qp_ctx = req->qp_ctx; + struct sec_request_buf *buf = &req->buf; int copy_size, pbuf_length; int req_id = req->req_id; @@ -932,10 +1018,16 @@ static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req, else copy_size = c_req->c_len; - pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst), - qp_ctx->res[req_id].pbuf, copy_size); + if (req->req_id < 0) + pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst), buf->pbuf, copy_size); + else + pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst), qp_ctx->res[req_id].pbuf, + copy_size); if (unlikely(pbuf_length != copy_size)) dev_err(ctx->dev, "copy pbuf data to dst error!\n"); + + if (req->req_id < 0) + dma_unmap_single(ctx->dev, buf->in_dma, SEC_PBUF_SZ, DMA_BIDIRECTIONAL); } static int sec_aead_mac_init(struct sec_aead_req *req) @@ -957,8 +1049,88 @@ static int sec_aead_mac_init(struct sec_aead_req *req) return 0; } -static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, - struct scatterlist *src, struct scatterlist *dst) +static void fill_sg_to_hw_sge(struct scatterlist *sgl, struct sec_hw_sge *hw_sge) +{ + hw_sge->buf = sg_dma_address(sgl); + hw_sge->len = cpu_to_le32(sg_dma_len(sgl)); + hw_sge->page_ctrl = sg_virt(sgl); +} + +static int sec_cipher_to_hw_sgl(struct device *dev, struct scatterlist *src, + struct sec_hw_sgl *src_in, dma_addr_t *hw_sgl_dma, + int dma_dir) +{ + struct sec_hw_sge *curr_hw_sge = src_in->sge_entries; + u32 i, sg_n, sg_n_mapped; + struct scatterlist *sg; + u32 sge_var = 0; + + sg_n = sg_nents(src); + sg_n_mapped = dma_map_sg(dev, src, sg_n, dma_dir); + if (unlikely(!sg_n_mapped)) { + dev_err(dev, "dma mapping for SG error!\n"); + return -EINVAL; + } else if (unlikely(sg_n_mapped > SEC_SGE_NR_NUM)) { + dev_err(dev, "the number of entries in input scatterlist error!\n"); + dma_unmap_sg(dev, src, sg_n, dma_dir); + return -EINVAL; + } + + for_each_sg(src, sg, sg_n_mapped, i) { + fill_sg_to_hw_sge(sg, curr_hw_sge); + curr_hw_sge++; + sge_var++; + } + + src_in->entry_sum_in_sgl = cpu_to_le16(sge_var); + src_in->entry_sum_in_chain = cpu_to_le16(SEC_SGE_NR_NUM); + src_in->entry_length_in_sgl = cpu_to_le16(SEC_SGE_NR_NUM); + *hw_sgl_dma = dma_map_single(dev, src_in, sizeof(struct sec_hw_sgl), dma_dir); + if (unlikely(dma_mapping_error(dev, *hw_sgl_dma))) { + dma_unmap_sg(dev, src, sg_n, dma_dir); + return -ENOMEM; + } + + return 0; +} + +static void sec_cipher_put_hw_sgl(struct device *dev, struct scatterlist *src, + dma_addr_t src_in, int dma_dir) +{ + dma_unmap_single(dev, src_in, sizeof(struct sec_hw_sgl), dma_dir); + dma_unmap_sg(dev, src, sg_nents(src), dma_dir); +} + +static int sec_cipher_map_sgl(struct device *dev, struct sec_req *req, + struct scatterlist *src, struct scatterlist *dst) +{ + struct sec_hw_sgl *src_in = &req->buf.data_buf.in; + struct sec_hw_sgl *dst_out = &req->buf.data_buf.out; + int ret; + + if (dst == src) { + ret = sec_cipher_to_hw_sgl(dev, src, src_in, &req->buf.in_dma, + DMA_BIDIRECTIONAL); + req->buf.out_dma = req->buf.in_dma; + return ret; + } + + ret = sec_cipher_to_hw_sgl(dev, src, src_in, &req->buf.in_dma, DMA_TO_DEVICE); + if (unlikely(ret)) + return ret; + + ret = sec_cipher_to_hw_sgl(dev, dst, dst_out, &req->buf.out_dma, + DMA_FROM_DEVICE); + if (unlikely(ret)) { + sec_cipher_put_hw_sgl(dev, src, req->buf.in_dma, DMA_TO_DEVICE); + return ret; + } + + return 0; +} + +static int sec_cipher_map_inner(struct sec_ctx *ctx, struct sec_req *req, + struct scatterlist *src, struct scatterlist *dst) { struct sec_cipher_req *c_req = &req->c_req; struct sec_aead_req *a_req = &req->aead_req; @@ -978,10 +1150,9 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, a_req->out_mac_dma = res->pbuf_dma + SEC_PBUF_MAC_OFFSET; } - ret = sec_cipher_pbuf_map(ctx, req, src); - - return ret; + return sec_cipher_pbuf_map(ctx, req, src); } + c_req->c_ivin = res->c_ivin; c_req->c_ivin_dma = res->c_ivin_dma; if (ctx->alg_type == SEC_AEAD) { @@ -1030,22 +1201,109 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, return 0; } +static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req, + struct scatterlist *src, struct scatterlist *dst) +{ + struct sec_aead_req *a_req = &req->aead_req; + struct sec_cipher_req *c_req = &req->c_req; + bool is_aead = (ctx->alg_type == SEC_AEAD); + struct device *dev = ctx->dev; + int ret = -ENOMEM; + + if (req->req_id >= 0) + return sec_cipher_map_inner(ctx, req, src, dst); + + c_req->c_ivin = c_req->c_ivin_buf; + c_req->c_ivin_dma = dma_map_single(dev, c_req->c_ivin, + SEC_IV_SIZE, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, c_req->c_ivin_dma))) + return -ENOMEM; + + if (is_aead) { + a_req->a_ivin = a_req->a_ivin_buf; + a_req->out_mac = a_req->out_mac_buf; + a_req->a_ivin_dma = dma_map_single(dev, a_req->a_ivin, + SEC_IV_SIZE, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, a_req->a_ivin_dma))) + goto free_c_ivin_dma; + + a_req->out_mac_dma = dma_map_single(dev, a_req->out_mac, + SEC_MAX_MAC_LEN, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(dev, a_req->out_mac_dma))) + goto free_a_ivin_dma; + } + if (req->use_pbuf) { + ret = sec_cipher_pbuf_map(ctx, req, src); + if (unlikely(ret)) + goto free_out_mac_dma; + + return 0; + } + + if (!c_req->encrypt && is_aead) { + ret = sec_aead_mac_init(a_req); + if (unlikely(ret)) { + dev_err(dev, "fail to init mac data for ICV!\n"); + goto free_out_mac_dma; + } + } + + ret = sec_cipher_map_sgl(dev, req, src, dst); + if (unlikely(ret)) { + dev_err(dev, "fail to dma map input sgl buffers!\n"); + goto free_out_mac_dma; + } + + return 0; + +free_out_mac_dma: + if (is_aead) + dma_unmap_single(dev, a_req->out_mac_dma, SEC_MAX_MAC_LEN, DMA_BIDIRECTIONAL); +free_a_ivin_dma: + if (is_aead) + dma_unmap_single(dev, a_req->a_ivin_dma, SEC_IV_SIZE, DMA_TO_DEVICE); +free_c_ivin_dma: + dma_unmap_single(dev, c_req->c_ivin_dma, SEC_IV_SIZE, DMA_TO_DEVICE); + return ret; +} + static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req, struct scatterlist *src, struct scatterlist *dst) { + struct sec_aead_req *a_req = &req->aead_req; struct sec_cipher_req *c_req = &req->c_req; struct device *dev = ctx->dev; + if (req->req_id >= 0) { + if (req->use_pbuf) { + sec_cipher_pbuf_unmap(ctx, req, dst); + } else { + if (dst != src) { + hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out, DMA_FROM_DEVICE); + hisi_acc_sg_buf_unmap(dev, src, req->in, DMA_TO_DEVICE); + } else { + hisi_acc_sg_buf_unmap(dev, src, req->in, DMA_BIDIRECTIONAL); + } + } + return; + } + if (req->use_pbuf) { sec_cipher_pbuf_unmap(ctx, req, dst); } else { if (dst != src) { - hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out, DMA_FROM_DEVICE); - hisi_acc_sg_buf_unmap(dev, src, req->in, DMA_TO_DEVICE); + sec_cipher_put_hw_sgl(dev, dst, req->buf.out_dma, DMA_FROM_DEVICE); + sec_cipher_put_hw_sgl(dev, src, req->buf.in_dma, DMA_TO_DEVICE); } else { - hisi_acc_sg_buf_unmap(dev, src, req->in, DMA_BIDIRECTIONAL); + sec_cipher_put_hw_sgl(dev, src, req->buf.in_dma, DMA_BIDIRECTIONAL); } } + + dma_unmap_single(dev, c_req->c_ivin_dma, SEC_IV_SIZE, DMA_TO_DEVICE); + if (ctx->alg_type == SEC_AEAD) { + dma_unmap_single(dev, a_req->a_ivin_dma, SEC_IV_SIZE, DMA_TO_DEVICE); + dma_unmap_single(dev, a_req->out_mac_dma, SEC_MAX_MAC_LEN, DMA_BIDIRECTIONAL); + } } static int sec_skcipher_sgl_map(struct sec_ctx *ctx, struct sec_req *req) @@ -1262,8 +1520,15 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma); sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma); - sec_sqe->type2.data_src_addr = cpu_to_le64(req->in_dma); - sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma); + if (req->req_id < 0) { + sec_sqe->type2.data_src_addr = cpu_to_le64(req->buf.in_dma); + sec_sqe->type2.data_dst_addr = cpu_to_le64(req->buf.out_dma); + } else { + sec_sqe->type2.data_src_addr = cpu_to_le64(req->in_dma); + sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma); + } + if (sec_sqe->type2.data_src_addr != sec_sqe->type2.data_dst_addr) + de = 0x1 << SEC_DE_OFFSET; sec_sqe->type2.icvw_kmode |= cpu_to_le16(((u16)c_ctx->c_mode) << SEC_CMODE_OFFSET); @@ -1289,13 +1554,10 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req) sec_sqe->sdm_addr_type |= da_type; scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET; - if (req->in_dma != c_req->c_out_dma) - de = 0x1 << SEC_DE_OFFSET; sec_sqe->sds_sa_type = (de | scene | sa_type); sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len); - sec_sqe->type2.tag = cpu_to_le16((u16)req->req_id); return 0; } @@ -1312,8 +1574,15 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) sec_sqe3->c_key_addr = cpu_to_le64(c_ctx->c_key_dma); sec_sqe3->no_scene.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma); - sec_sqe3->data_src_addr = cpu_to_le64(req->in_dma); - sec_sqe3->data_dst_addr = cpu_to_le64(c_req->c_out_dma); + if (req->req_id < 0) { + sec_sqe3->data_src_addr = cpu_to_le64(req->buf.in_dma); + sec_sqe3->data_dst_addr = cpu_to_le64(req->buf.out_dma); + } else { + sec_sqe3->data_src_addr = cpu_to_le64(req->in_dma); + sec_sqe3->data_dst_addr = cpu_to_le64(c_req->c_out_dma); + } + if (sec_sqe3->data_src_addr != sec_sqe3->data_dst_addr) + bd_param |= 0x1 << SEC_DE_OFFSET_V3; sec_sqe3->c_mode_alg = ((u8)c_ctx->c_alg << SEC_CALG_OFFSET_V3) | c_ctx->c_mode; @@ -1339,8 +1608,6 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) } bd_param |= SEC_COMM_SCENE << SEC_SCENE_OFFSET_V3; - if (req->in_dma != c_req->c_out_dma) - bd_param |= 0x1 << SEC_DE_OFFSET_V3; bd_param |= SEC_BD_TYPE3; sec_sqe3->bd_param = cpu_to_le32(bd_param); @@ -1372,15 +1639,12 @@ static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type) size_t sz; u8 *iv; - if (req->c_req.encrypt) - sgl = alg_type == SEC_SKCIPHER ? sk_req->dst : aead_req->dst; - else - sgl = alg_type == SEC_SKCIPHER ? sk_req->src : aead_req->src; - if (alg_type == SEC_SKCIPHER) { + sgl = req->c_req.encrypt ? sk_req->dst : sk_req->src; iv = sk_req->iv; cryptlen = sk_req->cryptlen; } else { + sgl = req->c_req.encrypt ? aead_req->dst : aead_req->src; iv = aead_req->iv; cryptlen = aead_req->cryptlen; } @@ -1391,57 +1655,26 @@ static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type) if (unlikely(sz != iv_size)) dev_err(req->ctx->dev, "copy output iv error!\n"); } else { - sz = cryptlen / iv_size; - if (cryptlen % iv_size) - sz += 1; + sz = (cryptlen + iv_size - 1) / iv_size; ctr_iv_inc(iv, iv_size, sz); } } -static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx, - struct sec_qp_ctx *qp_ctx) -{ - struct sec_req *backlog_req = NULL; - - spin_lock_bh(&qp_ctx->req_lock); - if (ctx->fake_req_limit >= - atomic_read(&qp_ctx->qp->qp_status.used) && - !list_empty(&qp_ctx->backlog)) { - backlog_req = list_first_entry(&qp_ctx->backlog, - typeof(*backlog_req), backlog_head); - list_del(&backlog_req->backlog_head); - } - spin_unlock_bh(&qp_ctx->req_lock); - - return backlog_req; -} - static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req, int err) { - struct skcipher_request *sk_req = req->c_req.sk_req; struct sec_qp_ctx *qp_ctx = req->qp_ctx; - struct skcipher_request *backlog_sk_req; - struct sec_req *backlog_req; - sec_free_req_id(req); + if (req->req_id >= 0) + sec_free_req_id(req); /* IV output at encrypto of CBC/CTR mode */ if (!err && (ctx->c_ctx.c_mode == SEC_CMODE_CBC || ctx->c_ctx.c_mode == SEC_CMODE_CTR) && req->c_req.encrypt) sec_update_iv(req, SEC_SKCIPHER); - while (1) { - backlog_req = sec_back_req_clear(ctx, qp_ctx); - if (!backlog_req) - break; - - backlog_sk_req = backlog_req->c_req.sk_req; - skcipher_request_complete(backlog_sk_req, -EINPROGRESS); - atomic64_inc(&ctx->sec->debug.dfx.recv_busy_cnt); - } - - skcipher_request_complete(sk_req, err); + crypto_request_complete(req->base, err); + sec_alg_send_backlog(ctx, qp_ctx); } static void set_aead_auth_iv(struct sec_ctx *ctx, struct sec_req *req) @@ -1680,21 +1913,14 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) struct aead_request *a_req = req->aead_req.aead_req; struct crypto_aead *tfm = crypto_aead_reqtfm(a_req); size_t authsize = crypto_aead_authsize(tfm); - struct sec_aead_req *aead_req = &req->aead_req; - struct sec_cipher_req *c_req = &req->c_req; struct sec_qp_ctx *qp_ctx = req->qp_ctx; - struct aead_request *backlog_aead_req; - struct sec_req *backlog_req; size_t sz; - if (!err && c->c_ctx.c_mode == SEC_CMODE_CBC && c_req->encrypt) - sec_update_iv(req, SEC_AEAD); + if (!err && req->c_req.encrypt) { + if (c->c_ctx.c_mode == SEC_CMODE_CBC) + sec_update_iv(req, SEC_AEAD); - /* Copy output mac */ - if (!err && c_req->encrypt) { - struct scatterlist *sgl = a_req->dst; - - sz = sg_pcopy_from_buffer(sgl, sg_nents(sgl), aead_req->out_mac, + sz = sg_pcopy_from_buffer(a_req->dst, sg_nents(a_req->dst), req->aead_req.out_mac, authsize, a_req->cryptlen + a_req->assoclen); if (unlikely(sz != authsize)) { dev_err(c->dev, "copy out mac err!\n"); @@ -1702,48 +1928,39 @@ static void sec_aead_callback(struct sec_ctx *c, struct sec_req *req, int err) } } - sec_free_req_id(req); + if (req->req_id >= 0) + sec_free_req_id(req); - while (1) { - backlog_req = sec_back_req_clear(c, qp_ctx); - if (!backlog_req) - break; - - backlog_aead_req = backlog_req->aead_req.aead_req; - aead_request_complete(backlog_aead_req, -EINPROGRESS); - atomic64_inc(&c->sec->debug.dfx.recv_busy_cnt); - } - - aead_request_complete(a_req, err); + crypto_request_complete(req->base, err); + sec_alg_send_backlog(c, qp_ctx); } -static void sec_request_uninit(struct sec_ctx *ctx, struct sec_req *req) +static void sec_request_uninit(struct sec_req *req) { - sec_free_req_id(req); - sec_free_queue_id(ctx, req); + if (req->req_id >= 0) + sec_free_req_id(req); } static int sec_request_init(struct sec_ctx *ctx, struct sec_req *req) { struct sec_qp_ctx *qp_ctx; - int queue_id; + int i; - /* To load balance */ - queue_id = sec_alloc_queue_id(ctx, req); - qp_ctx = &ctx->qp_ctx[queue_id]; - - req->req_id = sec_alloc_req_id(req, qp_ctx); - if (unlikely(req->req_id < 0)) { - sec_free_queue_id(ctx, req); - return req->req_id; + for (i = 0; i < ctx->sec->ctx_q_num; i++) { + qp_ctx = &ctx->qp_ctx[i]; + req->req_id = sec_alloc_req_id(req, qp_ctx); + if (req->req_id >= 0) + break; } + req->qp_ctx = qp_ctx; + req->backlog = &qp_ctx->backlog; + return 0; } static int sec_process(struct sec_ctx *ctx, struct sec_req *req) { - struct sec_cipher_req *c_req = &req->c_req; int ret; ret = sec_request_init(ctx, req); @@ -1760,8 +1977,7 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req) sec_update_iv(req, ctx->alg_type); ret = ctx->req_op->bd_send(ctx, req); - if (unlikely((ret != -EBUSY && ret != -EINPROGRESS) || - (ret == -EBUSY && !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG)))) { + if (unlikely((ret != -EBUSY && ret != -EINPROGRESS))) { dev_err_ratelimited(ctx->dev, "send sec request failed!\n"); goto err_send_req; } @@ -1772,16 +1988,23 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req) /* As failing, restore the IV from user */ if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt) { if (ctx->alg_type == SEC_SKCIPHER) - memcpy(req->c_req.sk_req->iv, c_req->c_ivin, + memcpy(req->c_req.sk_req->iv, req->c_req.c_ivin, ctx->c_ctx.ivsize); else - memcpy(req->aead_req.aead_req->iv, c_req->c_ivin, + memcpy(req->aead_req.aead_req->iv, req->c_req.c_ivin, ctx->c_ctx.ivsize); } sec_request_untransfer(ctx, req); + err_uninit_req: - sec_request_uninit(ctx, req); + sec_request_uninit(req); + if (ctx->alg_type == SEC_AEAD) + ret = sec_aead_soft_crypto(ctx, req->aead_req.aead_req, + req->c_req.encrypt); + else + ret = sec_skcipher_soft_crypto(ctx, req->c_req.sk_req, + req->c_req.encrypt); return ret; } @@ -1855,7 +2078,7 @@ static int sec_aead_init(struct crypto_aead *tfm) struct sec_ctx *ctx = crypto_aead_ctx(tfm); int ret; - crypto_aead_set_reqsize(tfm, sizeof(struct sec_req)); + crypto_aead_set_reqsize_dma(tfm, sizeof(struct sec_req)); ctx->alg_type = SEC_AEAD; ctx->c_ctx.ivsize = crypto_aead_ivsize(tfm); if (ctx->c_ctx.ivsize < SEC_AIV_SIZE || @@ -2092,7 +2315,7 @@ static int sec_skcipher_soft_crypto(struct sec_ctx *ctx, static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(sk_req); - struct sec_req *req = skcipher_request_ctx(sk_req); + struct sec_req *req = skcipher_request_ctx_dma(sk_req); struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); bool need_fallback = false; int ret; @@ -2107,6 +2330,7 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt) req->c_req.sk_req = sk_req; req->c_req.encrypt = encrypt; req->ctx = ctx; + req->base = &sk_req->base; ret = sec_skcipher_param_check(ctx, req, &need_fallback); if (unlikely(ret)) @@ -2241,6 +2465,9 @@ static int sec_aead_spec_check(struct sec_ctx *ctx, struct sec_req *sreq) return -EINVAL; if (unlikely(ctx->a_ctx.a_key_len & WORD_MASK)) return -EINVAL; + } else if (c_mode == SEC_CMODE_GCM) { + if (unlikely(sz < SEC_GCM_MIN_AUTH_SZ)) + return -EINVAL; } return 0; @@ -2314,7 +2541,7 @@ static int sec_aead_soft_crypto(struct sec_ctx *ctx, static int sec_aead_crypto(struct aead_request *a_req, bool encrypt) { struct crypto_aead *tfm = crypto_aead_reqtfm(a_req); - struct sec_req *req = aead_request_ctx(a_req); + struct sec_req *req = aead_request_ctx_dma(a_req); struct sec_ctx *ctx = crypto_aead_ctx(tfm); size_t sz = crypto_aead_authsize(tfm); bool need_fallback = false; @@ -2324,6 +2551,7 @@ static int sec_aead_crypto(struct aead_request *a_req, bool encrypt) req->aead_req.aead_req = a_req; req->c_req.encrypt = encrypt; req->ctx = ctx; + req->base = &a_req->base; req->c_req.c_len = a_req->cryptlen - (req->c_req.encrypt ? 0 : sz); ret = sec_aead_param_check(ctx, req, &need_fallback); From 63fa7c4dc0ea965d0a17fdbf51e5b421f61000fe Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 10 Jul 2025 14:33:40 +0100 Subject: [PATCH 098/116] crypto: qat - validate service in rate limiting sysfs api The sysfs interface 'qat_rl/srv' currently allows all valid services, even if a service is not configured for the device. This leads to a failure when attempting to add the SLA using 'qat_rl/sla_op'. Add a check using is_service_enabled() to ensure the requested service is enabled. If not, return -EINVAL to prevent invalid configurations. Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_rl.c | 3 +-- drivers/crypto/intel/qat/qat_common/adf_rl.h | 1 + drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c | 8 ++++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index e782c23fc1bf..d320bfcb9919 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -209,8 +209,7 @@ u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, } } -static bool is_service_enabled(struct adf_accel_dev *accel_dev, - enum adf_base_services rl_srv) +bool is_service_enabled(struct adf_accel_dev *accel_dev, enum adf_base_services rl_srv) { enum adf_cfg_service_type arb_srv = srv_to_cfg_svc_type(rl_srv); struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h index bfe750ea0e83..9b4678cee1fd 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.h +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h @@ -175,5 +175,6 @@ u32 adf_rl_calculate_ae_cycles(struct adf_accel_dev *accel_dev, u32 sla_val, enum adf_base_services svc_type); u32 adf_rl_calculate_slice_tokens(struct adf_accel_dev *accel_dev, u32 sla_val, enum adf_base_services svc_type); +bool is_service_enabled(struct adf_accel_dev *accel_dev, enum adf_base_services rl_srv); #endif /* ADF_RL_H_ */ diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c index bedb514d4e30..a8c3be24b3b4 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c @@ -291,14 +291,22 @@ static ssize_t srv_show(struct device *dev, struct device_attribute *attr, static ssize_t srv_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { + struct adf_accel_dev *accel_dev; unsigned int val; int ret; + accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev)); + if (!accel_dev) + return -EINVAL; + ret = sysfs_match_string(rl_services, buf); if (ret < 0) return ret; val = ret; + if (!is_service_enabled(accel_dev, val)) + return -EINVAL; + ret = set_param_u(dev, SRV, val); if (ret) return ret; From fa37d386c956a4f0112f3ade20b1d6948b6b5238 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 10 Jul 2025 14:33:41 +0100 Subject: [PATCH 099/116] crypto: qat - add decompression service for rate limiting Add a new base service type ADF_SVC_DECOMP to the QAT rate limiting (RL) infrastructure. This enables RL support for the decompression (DECOMP) service type, allowing service-level agreements (SLAs) to be enforced when decompression is configured. The new service is exposed in the sysfs RL service list for visibility. Note that this support is applicable only to devices that provide the decompression service, such as QAT GEN6 devices. Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_rl.c | 2 ++ drivers/crypto/intel/qat/qat_common/adf_rl.h | 1 + drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c | 1 + 3 files changed, 4 insertions(+) diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index d320bfcb9919..03c394d8c066 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -177,6 +177,8 @@ static enum adf_cfg_service_type srv_to_cfg_svc_type(enum adf_base_services rl_s return SYM; case ADF_SVC_DC: return COMP; + case ADF_SVC_DECOMP: + return DECOMP; default: return UNUSED; } diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h index 9b4678cee1fd..62cc47d1218a 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.h +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h @@ -28,6 +28,7 @@ enum adf_base_services { ADF_SVC_ASYM = 0, ADF_SVC_SYM, ADF_SVC_DC, + ADF_SVC_DECOMP, ADF_SVC_NONE, }; diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c index a8c3be24b3b4..43df32df0dc5 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c @@ -35,6 +35,7 @@ static const char *const rl_services[] = { [ADF_SVC_ASYM] = "asym", [ADF_SVC_SYM] = "sym", [ADF_SVC_DC] = "dc", + [ADF_SVC_DECOMP] = "decomp", }; static const char *const rl_operations[] = { From d8d7e283e0d98d7a57346cc65feab59bf9638de9 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 10 Jul 2025 14:33:42 +0100 Subject: [PATCH 100/116] crypto: qat - consolidate service enums The enums `adf_base_services` (used in rate limiting) and `adf_services` define the same values, resulting in code duplication. To improve consistency across the QAT driver: (1) rename `adf_services` to `adf_base_services` in adf_cfg_services.c to better reflect its role in defining core services (those with dedicated accelerators), (2) introduce a new `adf_extended_services` enum starting from `SVC_BASE_COUNT`, and move `SVC_DCC` into it, as it represents an extended service (DC with chaining), and (3) remove the redundant `adf_base_services` enum from the rate limiting implementation. This does not introduce any functional change. Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../intel/qat/qat_420xx/adf_420xx_hw_data.c | 6 ++-- .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 6 ++-- .../intel/qat/qat_6xxx/adf_6xxx_hw_data.c | 6 ++-- .../intel/qat/qat_common/adf_cfg_services.c | 8 ++--- .../intel/qat/qat_common/adf_cfg_services.h | 10 ++++-- .../intel/qat/qat_common/adf_gen4_hw_data.c | 2 +- drivers/crypto/intel/qat/qat_common/adf_rl.c | 35 +++++++++---------- drivers/crypto/intel/qat/qat_common/adf_rl.h | 10 ++---- .../intel/qat/qat_common/adf_sysfs_rl.c | 14 ++++---- 9 files changed, 47 insertions(+), 50 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index 8340b5e8a947..32bb9e1826d2 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -296,9 +296,9 @@ static void adf_init_rl_data(struct adf_rl_hw_data *rl_data) rl_data->pcie_scale_div = ADF_420XX_RL_PCIE_SCALE_FACTOR_DIV; rl_data->pcie_scale_mul = ADF_420XX_RL_PCIE_SCALE_FACTOR_MUL; rl_data->dcpr_correction = ADF_420XX_RL_DCPR_CORRECTION; - rl_data->max_tp[ADF_SVC_ASYM] = ADF_420XX_RL_MAX_TP_ASYM; - rl_data->max_tp[ADF_SVC_SYM] = ADF_420XX_RL_MAX_TP_SYM; - rl_data->max_tp[ADF_SVC_DC] = ADF_420XX_RL_MAX_TP_DC; + rl_data->max_tp[SVC_ASYM] = ADF_420XX_RL_MAX_TP_ASYM; + rl_data->max_tp[SVC_SYM] = ADF_420XX_RL_MAX_TP_SYM; + rl_data->max_tp[SVC_DC] = ADF_420XX_RL_MAX_TP_DC; rl_data->scan_interval = ADF_420XX_RL_SCANS_PER_SEC; rl_data->scale_ref = ADF_420XX_RL_SLICE_REF; } diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 4d4889533558..f917cc9db09d 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -222,9 +222,9 @@ static void adf_init_rl_data(struct adf_rl_hw_data *rl_data) rl_data->pcie_scale_div = ADF_4XXX_RL_PCIE_SCALE_FACTOR_DIV; rl_data->pcie_scale_mul = ADF_4XXX_RL_PCIE_SCALE_FACTOR_MUL; rl_data->dcpr_correction = ADF_4XXX_RL_DCPR_CORRECTION; - rl_data->max_tp[ADF_SVC_ASYM] = ADF_4XXX_RL_MAX_TP_ASYM; - rl_data->max_tp[ADF_SVC_SYM] = ADF_4XXX_RL_MAX_TP_SYM; - rl_data->max_tp[ADF_SVC_DC] = ADF_4XXX_RL_MAX_TP_DC; + rl_data->max_tp[SVC_ASYM] = ADF_4XXX_RL_MAX_TP_ASYM; + rl_data->max_tp[SVC_SYM] = ADF_4XXX_RL_MAX_TP_SYM; + rl_data->max_tp[SVC_DC] = ADF_4XXX_RL_MAX_TP_DC; rl_data->scan_interval = ADF_4XXX_RL_SCANS_PER_SEC; rl_data->scale_ref = ADF_4XXX_RL_SLICE_REF; } diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c index d3f1034f33fb..28b7a7649bb6 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c @@ -103,7 +103,7 @@ static bool services_supported(unsigned long mask) { int num_svc; - if (mask >= BIT(SVC_BASE_COUNT)) + if (mask >= BIT(SVC_COUNT)) return false; num_svc = hweight_long(mask); @@ -138,7 +138,7 @@ static int get_service(unsigned long *mask) return -EINVAL; } -static enum adf_cfg_service_type get_ring_type(enum adf_services service) +static enum adf_cfg_service_type get_ring_type(unsigned int service) { switch (service) { case SVC_SYM: @@ -155,7 +155,7 @@ static enum adf_cfg_service_type get_ring_type(enum adf_services service) } } -static const unsigned long *get_thrd_mask(enum adf_services service) +static const unsigned long *get_thrd_mask(unsigned int service) { switch (service) { case SVC_SYM: diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c index f49227b10064..ab3cbce32dc4 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c @@ -20,9 +20,9 @@ static const char *const adf_cfg_services[] = { /* * Ensure that the size of the array matches the number of services, - * SVC_BASE_COUNT, that is used to size the bitmap. + * SVC_COUNT, that is used to size the bitmap. */ -static_assert(ARRAY_SIZE(adf_cfg_services) == SVC_BASE_COUNT); +static_assert(ARRAY_SIZE(adf_cfg_services) == SVC_COUNT); /* * Ensure that the maximum number of concurrent services that can be @@ -35,7 +35,7 @@ static_assert(ARRAY_SIZE(adf_cfg_services) >= MAX_NUM_CONCURR_SVC); * Ensure that the number of services fit a single unsigned long, as each * service is represented by a bit in the mask. */ -static_assert(BITS_PER_LONG >= SVC_BASE_COUNT); +static_assert(BITS_PER_LONG >= SVC_COUNT); /* * Ensure that size of the concatenation of all service strings is smaller @@ -90,7 +90,7 @@ static int adf_service_mask_to_string(unsigned long mask, char *buf, size_t len) if (len < ADF_CFG_MAX_VAL_LEN_IN_BYTES) return -ENOSPC; - for_each_set_bit(bit, &mask, SVC_BASE_COUNT) { + for_each_set_bit(bit, &mask, SVC_COUNT) { if (offset) offset += scnprintf(buf + offset, len - offset, ADF_SERVICES_DELIMITER); diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h index 8709b7a52907..b2dd62eabf26 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h @@ -7,17 +7,21 @@ struct adf_accel_dev; -enum adf_services { +enum adf_base_services { SVC_ASYM = 0, SVC_SYM, SVC_DC, SVC_DECOMP, - SVC_DCC, SVC_BASE_COUNT }; +enum adf_extended_services { + SVC_DCC = SVC_BASE_COUNT, + SVC_COUNT +}; + enum adf_composed_services { - SVC_SYM_ASYM = SVC_BASE_COUNT, + SVC_SYM_ASYM = SVC_COUNT, SVC_SYM_DC, SVC_ASYM_DC, }; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index 0dbf9cc2a858..3103755e416e 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -262,7 +262,7 @@ bool adf_gen4_services_supported(unsigned long mask) { unsigned long num_svc = hweight_long(mask); - if (mask >= BIT(SVC_BASE_COUNT)) + if (mask >= BIT(SVC_COUNT)) return false; if (test_bit(SVC_DECOMP, &mask)) diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index 03c394d8c066..0d5f59bfa6a2 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -13,6 +13,7 @@ #include #include "adf_accel_devices.h" +#include "adf_cfg_services.h" #include "adf_common_drv.h" #include "adf_rl_admin.h" #include "adf_rl.h" @@ -55,7 +56,7 @@ static int validate_user_input(struct adf_accel_dev *accel_dev, } } - if (sla_in->srv >= ADF_SVC_NONE) { + if (sla_in->srv >= SVC_BASE_COUNT) { dev_notice(&GET_DEV(accel_dev), "Wrong service type\n"); return -EINVAL; @@ -171,13 +172,13 @@ static struct rl_sla *find_parent(struct adf_rl *rl_data, static enum adf_cfg_service_type srv_to_cfg_svc_type(enum adf_base_services rl_srv) { switch (rl_srv) { - case ADF_SVC_ASYM: + case SVC_ASYM: return ASYM; - case ADF_SVC_SYM: + case SVC_SYM: return SYM; - case ADF_SVC_DC: + case SVC_DC: return COMP; - case ADF_SVC_DECOMP: + case SVC_DECOMP: return DECOMP; default: return UNUSED; @@ -562,13 +563,13 @@ u32 adf_rl_calculate_slice_tokens(struct adf_accel_dev *accel_dev, u32 sla_val, avail_slice_cycles = hw_data->clock_frequency; switch (svc_type) { - case ADF_SVC_ASYM: + case SVC_ASYM: avail_slice_cycles *= device_data->slices.pke_cnt; break; - case ADF_SVC_SYM: + case SVC_SYM: avail_slice_cycles *= device_data->slices.cph_cnt; break; - case ADF_SVC_DC: + case SVC_DC: avail_slice_cycles *= device_data->slices.dcpr_cnt; break; default: @@ -618,9 +619,8 @@ u32 adf_rl_calculate_pci_bw(struct adf_accel_dev *accel_dev, u32 sla_val, sla_to_bytes *= device_data->max_tp[svc_type]; do_div(sla_to_bytes, device_data->scale_ref); - sla_to_bytes *= (svc_type == ADF_SVC_ASYM) ? RL_TOKEN_ASYM_SIZE : - BYTES_PER_MBIT; - if (svc_type == ADF_SVC_DC && is_bw_out) + sla_to_bytes *= (svc_type == SVC_ASYM) ? RL_TOKEN_ASYM_SIZE : BYTES_PER_MBIT; + if (svc_type == SVC_DC && is_bw_out) sla_to_bytes *= device_data->slices.dcpr_cnt - device_data->dcpr_correction; @@ -731,7 +731,7 @@ static int initialize_default_nodes(struct adf_accel_dev *accel_dev) sla_in.type = RL_ROOT; sla_in.parent_id = RL_PARENT_DEFAULT_ID; - for (i = 0; i < ADF_SVC_NONE; i++) { + for (i = 0; i < SVC_BASE_COUNT; i++) { if (!is_service_enabled(accel_dev, i)) continue; @@ -746,10 +746,9 @@ static int initialize_default_nodes(struct adf_accel_dev *accel_dev) /* Init default cluster for each root */ sla_in.type = RL_CLUSTER; - for (i = 0; i < ADF_SVC_NONE; i++) { + for (i = 0; i < SVC_BASE_COUNT; i++) { if (!rl_data->root[i]) continue; - sla_in.cir = rl_data->root[i]->cir; sla_in.pir = sla_in.cir; sla_in.srv = rl_data->root[i]->srv; @@ -988,7 +987,7 @@ int adf_rl_get_capability_remaining(struct adf_accel_dev *accel_dev, struct rl_sla *sla = NULL; int i; - if (srv >= ADF_SVC_NONE) + if (srv >= SVC_BASE_COUNT) return -EINVAL; if (sla_id > RL_SLA_EMPTY_ID && !validate_sla_id(accel_dev, sla_id)) { @@ -1087,9 +1086,9 @@ int adf_rl_init(struct adf_accel_dev *accel_dev) int ret = 0; /* Validate device parameters */ - if (RL_VALIDATE_NON_ZERO(rl_hw_data->max_tp[ADF_SVC_ASYM]) || - RL_VALIDATE_NON_ZERO(rl_hw_data->max_tp[ADF_SVC_SYM]) || - RL_VALIDATE_NON_ZERO(rl_hw_data->max_tp[ADF_SVC_DC]) || + if (RL_VALIDATE_NON_ZERO(rl_hw_data->max_tp[SVC_ASYM]) || + RL_VALIDATE_NON_ZERO(rl_hw_data->max_tp[SVC_SYM]) || + RL_VALIDATE_NON_ZERO(rl_hw_data->max_tp[SVC_DC]) || RL_VALIDATE_NON_ZERO(rl_hw_data->scan_interval) || RL_VALIDATE_NON_ZERO(rl_hw_data->pcie_scale_div) || RL_VALIDATE_NON_ZERO(rl_hw_data->pcie_scale_mul) || diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h index 62cc47d1218a..f2393bdb8ccc 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.h +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h @@ -7,6 +7,8 @@ #include #include +#include "adf_cfg_services.h" + struct adf_accel_dev; #define RL_ROOT_MAX 4 @@ -24,14 +26,6 @@ enum rl_node_type { RL_LEAF, }; -enum adf_base_services { - ADF_SVC_ASYM = 0, - ADF_SVC_SYM, - ADF_SVC_DC, - ADF_SVC_DECOMP, - ADF_SVC_NONE, -}; - /** * struct adf_rl_sla_input_data - ratelimiting user input data structure * @rp_mask: 64 bit bitmask of ring pair IDs which will be assigned to SLA. diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c index 43df32df0dc5..9d439df6d9ad 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c @@ -32,10 +32,10 @@ enum rl_params { }; static const char *const rl_services[] = { - [ADF_SVC_ASYM] = "asym", - [ADF_SVC_SYM] = "sym", - [ADF_SVC_DC] = "dc", - [ADF_SVC_DECOMP] = "decomp", + [SVC_ASYM] = "asym", + [SVC_SYM] = "sym", + [SVC_DC] = "dc", + [SVC_DECOMP] = "decomp", }; static const char *const rl_operations[] = { @@ -283,7 +283,7 @@ static ssize_t srv_show(struct device *dev, struct device_attribute *attr, if (ret) return ret; - if (get == ADF_SVC_NONE) + if (get == SVC_BASE_COUNT) return -EINVAL; return sysfs_emit(buf, "%s\n", rl_services[get]); @@ -448,8 +448,8 @@ int adf_sysfs_rl_add(struct adf_accel_dev *accel_dev) dev_err(&GET_DEV(accel_dev), "Failed to create qat_rl attribute group\n"); - data->cap_rem_srv = ADF_SVC_NONE; - data->input.srv = ADF_SVC_NONE; + data->cap_rem_srv = SVC_BASE_COUNT; + data->input.srv = SVC_BASE_COUNT; data->sysfs_added = true; return ret; From fdf31c75096051cee191ce1baf3f4c41b860e1c6 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 10 Jul 2025 14:33:43 +0100 Subject: [PATCH 101/116] crypto: qat - relocate service related functions Rename (1) is_service_enabled() to adf_is_service_enabled(), and (2) srv_to_cfg_svc_type() to adf_srv_to_cfg_svc_type(), and move them to adf_cfg_services.c which is the appropriate place for configuration-related service logic. This improves code organization and modularity by grouping related service configuration logic in a single location. Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../intel/qat/qat_common/adf_cfg_services.c | 32 ++++++++++++++++ .../intel/qat/qat_common/adf_cfg_services.h | 2 + drivers/crypto/intel/qat/qat_common/adf_rl.c | 37 ++----------------- drivers/crypto/intel/qat/qat_common/adf_rl.h | 1 - .../intel/qat/qat_common/adf_sysfs_rl.c | 2 +- 5 files changed, 38 insertions(+), 36 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c index ab3cbce32dc4..7d00bcb41ce7 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.c @@ -7,6 +7,7 @@ #include #include #include "adf_cfg.h" +#include "adf_cfg_common.h" #include "adf_cfg_services.h" #include "adf_cfg_strings.h" @@ -178,3 +179,34 @@ int adf_get_service_enabled(struct adf_accel_dev *accel_dev) return -EINVAL; } EXPORT_SYMBOL_GPL(adf_get_service_enabled); + +enum adf_cfg_service_type adf_srv_to_cfg_svc_type(enum adf_base_services svc) +{ + switch (svc) { + case SVC_ASYM: + return ASYM; + case SVC_SYM: + return SYM; + case SVC_DC: + return COMP; + case SVC_DECOMP: + return DECOMP; + default: + return UNUSED; + } +} + +bool adf_is_service_enabled(struct adf_accel_dev *accel_dev, enum adf_base_services svc) +{ + enum adf_cfg_service_type arb_srv = adf_srv_to_cfg_svc_type(svc); + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + u8 rps_per_bundle = hw_data->num_banks_per_vf; + int i; + + for (i = 0; i < rps_per_bundle; i++) { + if (GET_SRV_TYPE(accel_dev, i) == arb_srv) + return true; + } + + return false; +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h index b2dd62eabf26..913d717280af 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_services.h @@ -38,5 +38,7 @@ int adf_parse_service_string(struct adf_accel_dev *accel_dev, const char *in, size_t in_len, char *out, size_t out_len); int adf_get_service_enabled(struct adf_accel_dev *accel_dev); int adf_get_service_mask(struct adf_accel_dev *accel_dev, unsigned long *mask); +enum adf_cfg_service_type adf_srv_to_cfg_svc_type(enum adf_base_services svc); +bool adf_is_service_enabled(struct adf_accel_dev *accel_dev, enum adf_base_services svc); #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index 0d5f59bfa6a2..926975539740 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -169,22 +169,6 @@ static struct rl_sla *find_parent(struct adf_rl *rl_data, return NULL; } -static enum adf_cfg_service_type srv_to_cfg_svc_type(enum adf_base_services rl_srv) -{ - switch (rl_srv) { - case SVC_ASYM: - return ASYM; - case SVC_SYM: - return SYM; - case SVC_DC: - return COMP; - case SVC_DECOMP: - return DECOMP; - default: - return UNUSED; - } -} - /** * adf_rl_get_sla_arr_of_type() - Returns a pointer to SLA type specific array * @rl_data: pointer to ratelimiting data @@ -212,21 +196,6 @@ u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, } } -bool is_service_enabled(struct adf_accel_dev *accel_dev, enum adf_base_services rl_srv) -{ - enum adf_cfg_service_type arb_srv = srv_to_cfg_svc_type(rl_srv); - struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); - u8 rps_per_bundle = hw_data->num_banks_per_vf; - int i; - - for (i = 0; i < rps_per_bundle; i++) { - if (GET_SRV_TYPE(accel_dev, i) == arb_srv) - return true; - } - - return false; -} - /** * prepare_rp_ids() - Creates an array of ring pair IDs from bitmask * @accel_dev: pointer to acceleration device structure @@ -245,7 +214,7 @@ bool is_service_enabled(struct adf_accel_dev *accel_dev, enum adf_base_services static int prepare_rp_ids(struct adf_accel_dev *accel_dev, struct rl_sla *sla, const unsigned long rp_mask) { - enum adf_cfg_service_type arb_srv = srv_to_cfg_svc_type(sla->srv); + enum adf_cfg_service_type arb_srv = adf_srv_to_cfg_svc_type(sla->srv); u16 rps_per_bundle = GET_HW_DATA(accel_dev)->num_banks_per_vf; bool *rp_in_use = accel_dev->rate_limiting->rp_in_use; size_t rp_cnt_max = ARRAY_SIZE(sla->ring_pairs_ids); @@ -661,7 +630,7 @@ static int add_new_sla_entry(struct adf_accel_dev *accel_dev, } *sla_out = sla; - if (!is_service_enabled(accel_dev, sla_in->srv)) { + if (!adf_is_service_enabled(accel_dev, sla_in->srv)) { dev_notice(&GET_DEV(accel_dev), "Provided service is not enabled\n"); ret = -EINVAL; @@ -732,7 +701,7 @@ static int initialize_default_nodes(struct adf_accel_dev *accel_dev) sla_in.parent_id = RL_PARENT_DEFAULT_ID; for (i = 0; i < SVC_BASE_COUNT; i++) { - if (!is_service_enabled(accel_dev, i)) + if (!adf_is_service_enabled(accel_dev, i)) continue; sla_in.cir = device_data->scale_ref; diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h index f2393bdb8ccc..dee7f0c81906 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.h +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h @@ -170,6 +170,5 @@ u32 adf_rl_calculate_ae_cycles(struct adf_accel_dev *accel_dev, u32 sla_val, enum adf_base_services svc_type); u32 adf_rl_calculate_slice_tokens(struct adf_accel_dev *accel_dev, u32 sla_val, enum adf_base_services svc_type); -bool is_service_enabled(struct adf_accel_dev *accel_dev, enum adf_base_services rl_srv); #endif /* ADF_RL_H_ */ diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c index 9d439df6d9ad..f31556beed8b 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs_rl.c @@ -305,7 +305,7 @@ static ssize_t srv_store(struct device *dev, struct device_attribute *attr, return ret; val = ret; - if (!is_service_enabled(accel_dev, val)) + if (!adf_is_service_enabled(accel_dev, val)) return -EINVAL; ret = set_param_u(dev, SRV, val); From a95521531619945cba526da0a8241e43a18bade3 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 10 Jul 2025 14:33:44 +0100 Subject: [PATCH 102/116] crypto: qat - add adf_rl_get_num_svc_aes() in rate limiting Enhance the rate limiting (RL) infrastructure by adding adf_rl_get_num_svc_aes() which can be used to fetch the number of engines associated with the service type. Expand the structure adf_rl_hw_data with an array that contains the number of AEs per service. Implement adf_gen4_init_num_svc_aes() for QAT GEN4 devices to calculate the total number of acceleration engines dedicated to a specific service. Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../intel/qat/qat_420xx/adf_420xx_hw_data.c | 2 ++ .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 2 ++ .../intel/qat/qat_common/adf_gen4_hw_data.c | 22 +++++++++++++++++++ .../intel/qat/qat_common/adf_gen4_hw_data.h | 1 + drivers/crypto/intel/qat/qat_common/adf_rl.c | 13 ++++++++++- drivers/crypto/intel/qat/qat_common/adf_rl.h | 1 + 6 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index 32bb9e1826d2..67a1c1d8e23e 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -301,6 +301,8 @@ static void adf_init_rl_data(struct adf_rl_hw_data *rl_data) rl_data->max_tp[SVC_DC] = ADF_420XX_RL_MAX_TP_DC; rl_data->scan_interval = ADF_420XX_RL_SCANS_PER_SEC; rl_data->scale_ref = ADF_420XX_RL_SLICE_REF; + + adf_gen4_init_num_svc_aes(rl_data); } static int get_rp_group(struct adf_accel_dev *accel_dev, u32 ae_mask) diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index f917cc9db09d..9b728dba048b 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -227,6 +227,8 @@ static void adf_init_rl_data(struct adf_rl_hw_data *rl_data) rl_data->max_tp[SVC_DC] = ADF_4XXX_RL_MAX_TP_DC; rl_data->scan_interval = ADF_4XXX_RL_SCANS_PER_SEC; rl_data->scale_ref = ADF_4XXX_RL_SLICE_REF; + + adf_gen4_init_num_svc_aes(rl_data); } static u32 uof_get_num_objs(struct adf_accel_dev *accel_dev) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index 3103755e416e..5e4b45c3fabe 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -558,3 +558,25 @@ void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops) dc_ops->build_decomp_block = adf_gen4_build_decomp_block; } EXPORT_SYMBOL_GPL(adf_gen4_init_dc_ops); + +void adf_gen4_init_num_svc_aes(struct adf_rl_hw_data *device_data) +{ + struct adf_hw_device_data *hw_data; + unsigned int i; + u32 ae_cnt; + + hw_data = container_of(device_data, struct adf_hw_device_data, rl_data); + ae_cnt = hweight32(hw_data->get_ae_mask(hw_data)); + if (!ae_cnt) + return; + + for (i = 0; i < SVC_BASE_COUNT; i++) + device_data->svc_ae_mask[i] = ae_cnt - 1; + + /* + * The decompression service is not supported on QAT GEN4 devices. + * Therefore, set svc_ae_mask to 0. + */ + device_data->svc_ae_mask[SVC_DECOMP] = 0; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_num_svc_aes); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index 7f2b9cb0fe60..7fa203071c01 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -175,5 +175,6 @@ void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev, u32 bank_number); bool adf_gen4_services_supported(unsigned long service_mask); void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops); +void adf_gen4_init_num_svc_aes(struct adf_rl_hw_data *device_data); #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index 926975539740..77465ab6702c 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -552,6 +552,17 @@ u32 adf_rl_calculate_slice_tokens(struct adf_accel_dev *accel_dev, u32 sla_val, return allocated_tokens; } +static u32 adf_rl_get_num_svc_aes(struct adf_accel_dev *accel_dev, + enum adf_base_services svc) +{ + struct adf_rl_hw_data *device_data = &accel_dev->hw_device->rl_data; + + if (svc >= SVC_BASE_COUNT) + return 0; + + return device_data->svc_ae_mask[svc]; +} + u32 adf_rl_calculate_ae_cycles(struct adf_accel_dev *accel_dev, u32 sla_val, enum adf_base_services svc_type) { @@ -563,7 +574,7 @@ u32 adf_rl_calculate_ae_cycles(struct adf_accel_dev *accel_dev, u32 sla_val, return 0; avail_ae_cycles = hw_data->clock_frequency; - avail_ae_cycles *= hw_data->get_num_aes(hw_data) - 1; + avail_ae_cycles *= adf_rl_get_num_svc_aes(accel_dev, svc_type); do_div(avail_ae_cycles, device_data->scan_interval); sla_val *= device_data->max_tp[svc_type]; diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h index dee7f0c81906..59f885916157 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.h +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h @@ -89,6 +89,7 @@ struct adf_rl_hw_data { u32 pcie_scale_div; u32 dcpr_correction; u32 max_tp[RL_ROOT_MAX]; + u32 svc_ae_mask[SVC_BASE_COUNT]; struct rl_slice_cnt slices; }; From e983946de17281d473a4873f52806f0183bb5ab1 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 10 Jul 2025 14:33:45 +0100 Subject: [PATCH 103/116] crypto: qat - add get_svc_slice_cnt() in device data structure Enhance the adf_hw_device_data structure by introducing a new callback function get_svc_slice_cnt(), which provides a mechanism to query the total number of accelerator available on the device for a specific service. Implement adf_gen4_get_svc_slice_cnt() for QAT GEN4 devices to support this new interface. This function returns the total accelerator count for a specific service. Co-developed-by: George Abraham P Signed-off-by: George Abraham P Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- .../intel/qat/qat_420xx/adf_420xx_hw_data.c | 1 + .../intel/qat/qat_4xxx/adf_4xxx_hw_data.c | 1 + .../intel/qat/qat_common/adf_accel_devices.h | 2 ++ .../intel/qat/qat_common/adf_gen4_hw_data.c | 18 ++++++++++++++++++ .../intel/qat/qat_common/adf_gen4_hw_data.h | 2 ++ drivers/crypto/intel/qat/qat_common/adf_rl.c | 16 ++-------------- 6 files changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index 67a1c1d8e23e..53fa91d577ed 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -468,6 +468,7 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE; hw_data->clock_frequency = ADF_420XX_AE_FREQ; hw_data->services_supported = adf_gen4_services_supported; + hw_data->get_svc_slice_cnt = adf_gen4_get_svc_slice_cnt; adf_gen4_set_err_mask(&hw_data->dev_err_mask); adf_gen4_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 9b728dba048b..740f68a36ac5 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -462,6 +462,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE; hw_data->clock_frequency = ADF_4XXX_AE_FREQ; hw_data->services_supported = adf_gen4_services_supported; + hw_data->get_svc_slice_cnt = adf_gen4_get_svc_slice_cnt; adf_gen4_set_err_mask(&hw_data->dev_err_mask); adf_gen4_init_hw_csr_ops(&hw_data->csr_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index f76e0f6c66ae..9fe3239f0114 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -319,6 +319,8 @@ struct adf_hw_device_data { u32 (*get_ena_thd_mask)(struct adf_accel_dev *accel_dev, u32 obj_num); int (*dev_config)(struct adf_accel_dev *accel_dev); bool (*services_supported)(unsigned long mask); + u32 (*get_svc_slice_cnt)(struct adf_accel_dev *accel_dev, + enum adf_base_services svc); struct adf_pfvf_ops pfvf_ops; struct adf_hw_csr_ops csr_ops; struct adf_dc_ops dc_ops; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index 5e4b45c3fabe..349fdb323763 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -580,3 +580,21 @@ void adf_gen4_init_num_svc_aes(struct adf_rl_hw_data *device_data) device_data->svc_ae_mask[SVC_DECOMP] = 0; } EXPORT_SYMBOL_GPL(adf_gen4_init_num_svc_aes); + +u32 adf_gen4_get_svc_slice_cnt(struct adf_accel_dev *accel_dev, + enum adf_base_services svc) +{ + struct adf_rl_hw_data *device_data = &accel_dev->hw_device->rl_data; + + switch (svc) { + case SVC_SYM: + return device_data->slices.cph_cnt; + case SVC_ASYM: + return device_data->slices.pke_cnt; + case SVC_DC: + return device_data->slices.dcpr_cnt; + default: + return 0; + } +} +EXPORT_SYMBOL_GPL(adf_gen4_get_svc_slice_cnt); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index 7fa203071c01..cd26b6724c43 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -176,5 +176,7 @@ void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev, bool adf_gen4_services_supported(unsigned long service_mask); void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops); void adf_gen4_init_num_svc_aes(struct adf_rl_hw_data *device_data); +u32 adf_gen4_get_svc_slice_cnt(struct adf_accel_dev *accel_dev, + enum adf_base_services svc); #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index 77465ab6702c..c6a54e465931 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -529,21 +529,9 @@ u32 adf_rl_calculate_slice_tokens(struct adf_accel_dev *accel_dev, u32 sla_val, if (!sla_val) return 0; + /* Handle generation specific slice count adjustment */ avail_slice_cycles = hw_data->clock_frequency; - - switch (svc_type) { - case SVC_ASYM: - avail_slice_cycles *= device_data->slices.pke_cnt; - break; - case SVC_SYM: - avail_slice_cycles *= device_data->slices.cph_cnt; - break; - case SVC_DC: - avail_slice_cycles *= device_data->slices.dcpr_cnt; - break; - default: - break; - } + avail_slice_cycles *= hw_data->get_svc_slice_cnt(accel_dev, svc_type); do_div(avail_slice_cycles, device_data->scan_interval); allocated_tokens = avail_slice_cycles * sla_val; From 45515eec6662dee31a66bd06110ca0b8ded38574 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 10 Jul 2025 14:33:46 +0100 Subject: [PATCH 104/116] crypto: qat - add compression slice count for rate limiting In QAT GEN4 devices, the compression slice count was tracked using the dcpr_cnt field. Introduce a new cpr_cnt field in the rate limiting (RL) infrastructure to track the compression (CPR) slice count independently. The cpr_cnt value is populated via the RL_INIT admin message. The existing dcpr_cnt field will now be used exclusively to cache the decompression slice count, ensuring a clear separation between compression and decompression tracking. Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_rl.h | 1 + drivers/crypto/intel/qat/qat_common/adf_rl_admin.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h index 59f885916157..c1f3f9a51195 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.h +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h @@ -68,6 +68,7 @@ struct rl_slice_cnt { u8 dcpr_cnt; u8 pke_cnt; u8 cph_cnt; + u8 cpr_cnt; }; struct adf_rl_interface_data { diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl_admin.c b/drivers/crypto/intel/qat/qat_common/adf_rl_admin.c index 698a14f4ce66..4a3e0591fdba 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl_admin.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl_admin.c @@ -63,6 +63,7 @@ int adf_rl_send_admin_init_msg(struct adf_accel_dev *accel_dev, slices_int->pke_cnt = slices_resp.pke_cnt; /* For symmetric crypto, slice tokens are relative to the UCS slice */ slices_int->cph_cnt = slices_resp.ucs_cnt; + slices_int->cpr_cnt = slices_resp.cpr_cnt; return 0; } From 3471c899fd6be69383aa2b52c411a67c6200a762 Mon Sep 17 00:00:00 2001 From: Suman Kumar Chakraborty Date: Thu, 10 Jul 2025 14:33:47 +0100 Subject: [PATCH 105/116] crypto: qat - enable rate limiting feature for GEN6 devices Add support for enabling rate limiting(RL) feature for QAT GEN6 by initializing the rl_data member in adf_hw_device_data structure. Implement init_num_svc_aes() for GEN6 which will populate the number of AEs associated with the RL service type. Implement adf_gen6_get_svc_slice_cnt() for GEN6 which will return the slice count that can support the RL service type. Co-developed-by: George Abraham P Signed-off-by: George Abraham P Signed-off-by: Suman Kumar Chakraborty Reviewed-by: Giovanni Cabiddu Signed-off-by: Herbert Xu --- Documentation/ABI/testing/sysfs-driver-qat_rl | 14 ++-- .../intel/qat/qat_6xxx/adf_6xxx_hw_data.c | 71 +++++++++++++++++++ .../intel/qat/qat_6xxx/adf_6xxx_hw_data.h | 20 ++++++ 3 files changed, 98 insertions(+), 7 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-qat_rl b/Documentation/ABI/testing/sysfs-driver-qat_rl index 8c282ae3155d..d534f89b4971 100644 --- a/Documentation/ABI/testing/sysfs-driver-qat_rl +++ b/Documentation/ABI/testing/sysfs-driver-qat_rl @@ -31,7 +31,7 @@ Description: * rm_all: Removes all the configured SLAs. * Inputs: None - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat_rl/rp Date: January 2024 @@ -68,7 +68,7 @@ Description: ## Write # echo 0x5 > /sys/bus/pci/devices//qat_rl/rp - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat_rl/id Date: January 2024 @@ -101,7 +101,7 @@ Description: # cat /sys/bus/pci/devices//qat_rl/rp 0x5 ## ring pair ID 0 and ring pair ID 2 - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat_rl/cir Date: January 2024 @@ -135,7 +135,7 @@ Description: # cat /sys/bus/pci/devices//qat_rl/cir 500 - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat_rl/pir Date: January 2024 @@ -169,7 +169,7 @@ Description: # cat /sys/bus/pci/devices//qat_rl/pir 750 - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat_rl/srv Date: January 2024 @@ -202,7 +202,7 @@ Description: # cat /sys/bus/pci/devices//qat_rl/srv dc - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. What: /sys/bus/pci/devices//qat_rl/cap_rem Date: January 2024 @@ -223,4 +223,4 @@ Description: # cat /sys/bus/pci/devices//qat_rl/cap_rem 0 - This attribute is only available for qat_4xxx devices. + This attribute is only available for qat_4xxx and qat_6xxx devices. diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c index 28b7a7649bb6..bed88d3ce8ca 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.c @@ -524,6 +524,55 @@ static int adf_gen6_init_thd2arb_map(struct adf_accel_dev *accel_dev) return 0; } +static void init_num_svc_aes(struct adf_rl_hw_data *device_data) +{ + enum adf_fw_objs obj_type, obj_iter; + unsigned int svc, i, num_grp; + u32 ae_mask; + + for (svc = 0; svc < SVC_BASE_COUNT; svc++) { + switch (svc) { + case SVC_SYM: + case SVC_ASYM: + obj_type = ADF_FW_CY_OBJ; + break; + case SVC_DC: + case SVC_DECOMP: + obj_type = ADF_FW_DC_OBJ; + break; + } + + num_grp = ARRAY_SIZE(adf_default_fw_config); + for (i = 0; i < num_grp; i++) { + obj_iter = adf_default_fw_config[i].obj; + if (obj_iter == obj_type) { + ae_mask = adf_default_fw_config[i].ae_mask; + device_data->svc_ae_mask[svc] = hweight32(ae_mask); + break; + } + } + } +} + +static u32 adf_gen6_get_svc_slice_cnt(struct adf_accel_dev *accel_dev, + enum adf_base_services svc) +{ + struct adf_rl_hw_data *device_data = &accel_dev->hw_device->rl_data; + + switch (svc) { + case SVC_SYM: + return device_data->slices.cph_cnt; + case SVC_ASYM: + return device_data->slices.pke_cnt; + case SVC_DC: + return device_data->slices.cpr_cnt + device_data->slices.dcpr_cnt; + case SVC_DECOMP: + return device_data->slices.dcpr_cnt; + default: + return 0; + } +} + static void set_vc_csr_for_bank(void __iomem *csr, u32 bank_number) { u32 value; @@ -805,6 +854,25 @@ static int dev_config(struct adf_accel_dev *accel_dev) return ret; } +static void adf_gen6_init_rl_data(struct adf_rl_hw_data *rl_data) +{ + rl_data->pciout_tb_offset = ADF_GEN6_RL_TOKEN_PCIEOUT_BUCKET_OFFSET; + rl_data->pciin_tb_offset = ADF_GEN6_RL_TOKEN_PCIEIN_BUCKET_OFFSET; + rl_data->r2l_offset = ADF_GEN6_RL_R2L_OFFSET; + rl_data->l2c_offset = ADF_GEN6_RL_L2C_OFFSET; + rl_data->c2s_offset = ADF_GEN6_RL_C2S_OFFSET; + rl_data->pcie_scale_div = ADF_6XXX_RL_PCIE_SCALE_FACTOR_DIV; + rl_data->pcie_scale_mul = ADF_6XXX_RL_PCIE_SCALE_FACTOR_MUL; + rl_data->max_tp[SVC_ASYM] = ADF_6XXX_RL_MAX_TP_ASYM; + rl_data->max_tp[SVC_SYM] = ADF_6XXX_RL_MAX_TP_SYM; + rl_data->max_tp[SVC_DC] = ADF_6XXX_RL_MAX_TP_DC; + rl_data->max_tp[SVC_DECOMP] = ADF_6XXX_RL_MAX_TP_DECOMP; + rl_data->scan_interval = ADF_6XXX_RL_SCANS_PER_SEC; + rl_data->scale_ref = ADF_6XXX_RL_SLICE_REF; + + init_num_svc_aes(rl_data); +} + void adf_init_hw_data_6xxx(struct adf_hw_device_data *hw_data) { hw_data->dev_class = &adf_6xxx_class; @@ -863,6 +931,8 @@ void adf_init_hw_data_6xxx(struct adf_hw_device_data *hw_data) hw_data->enable_pm = enable_pm; hw_data->services_supported = services_supported; hw_data->num_rps = ADF_GEN6_ETR_MAX_BANKS; + hw_data->clock_frequency = ADF_6XXX_AE_FREQ; + hw_data->get_svc_slice_cnt = adf_gen6_get_svc_slice_cnt; adf_gen6_init_hw_csr_ops(&hw_data->csr_ops); adf_gen6_init_pf_pfvf_ops(&hw_data->pfvf_ops); @@ -870,6 +940,7 @@ void adf_init_hw_data_6xxx(struct adf_hw_device_data *hw_data) adf_gen6_init_vf_mig_ops(&hw_data->vfmig_ops); adf_gen6_init_ras_ops(&hw_data->ras_ops); adf_gen6_init_tl_data(&hw_data->tl_data); + adf_gen6_init_rl_data(&hw_data->rl_data); } void adf_clean_hw_data_6xxx(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h index 8824958527c4..d822911fe68c 100644 --- a/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h +++ b/drivers/crypto/intel/qat/qat_6xxx/adf_6xxx_hw_data.h @@ -122,6 +122,13 @@ /* Number of heartbeat counter pairs */ #define ADF_NUM_HB_CNT_PER_AE ADF_NUM_THREADS_PER_AE +/* Rate Limiting */ +#define ADF_GEN6_RL_R2L_OFFSET 0x508000 +#define ADF_GEN6_RL_L2C_OFFSET 0x509000 +#define ADF_GEN6_RL_C2S_OFFSET 0x508818 +#define ADF_GEN6_RL_TOKEN_PCIEIN_BUCKET_OFFSET 0x508800 +#define ADF_GEN6_RL_TOKEN_PCIEOUT_BUCKET_OFFSET 0x508804 + /* Physical function fuses */ #define ADF_6XXX_ACCELENGINES_MASK GENMASK(8, 0) #define ADF_6XXX_ADMIN_AE_MASK GENMASK(8, 8) @@ -133,6 +140,19 @@ #define ADF_6XXX_DC_OBJ "qat_6xxx_dc.bin" #define ADF_6XXX_ADMIN_OBJ "qat_6xxx_admin.bin" +/* RL constants */ +#define ADF_6XXX_RL_PCIE_SCALE_FACTOR_DIV 100 +#define ADF_6XXX_RL_PCIE_SCALE_FACTOR_MUL 102 +#define ADF_6XXX_RL_SCANS_PER_SEC 954 +#define ADF_6XXX_RL_MAX_TP_ASYM 173750UL +#define ADF_6XXX_RL_MAX_TP_SYM 95000UL +#define ADF_6XXX_RL_MAX_TP_DC 40000UL +#define ADF_6XXX_RL_MAX_TP_DECOMP 40000UL +#define ADF_6XXX_RL_SLICE_REF 1000UL + +/* Clock frequency */ +#define ADF_6XXX_AE_FREQ (1000 * HZ_PER_MHZ) + enum icp_qat_gen6_slice_mask { ICP_ACCEL_GEN6_MASK_UCS_SLICE = BIT(0), ICP_ACCEL_GEN6_MASK_AUTH_SLICE = BIT(1), From 3d4df408ba9bad2b205c7fb8afc1836a6a4ca88a Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Fri, 11 Jul 2025 13:27:43 +0100 Subject: [PATCH 106/116] crypto: qat - flush misc workqueue during device shutdown Repeated loading and unloading of a device specific QAT driver, for example qat_4xxx, in a tight loop can lead to a crash due to a use-after-free scenario. This occurs when a power management (PM) interrupt triggers just before the device-specific driver (e.g., qat_4xxx.ko) is unloaded, while the core driver (intel_qat.ko) remains loaded. Since the driver uses a shared workqueue (`qat_misc_wq`) across all devices and owned by intel_qat.ko, a deferred routine from the device-specific driver may still be pending in the queue. If this routine executes after the driver is unloaded, it can dereference freed memory, resulting in a page fault and kernel crash like the following: BUG: unable to handle page fault for address: ffa000002e50a01c #PF: supervisor read access in kernel mode RIP: 0010:pm_bh_handler+0x1d2/0x250 [intel_qat] Call Trace: pm_bh_handler+0x1d2/0x250 [intel_qat] process_one_work+0x171/0x340 worker_thread+0x277/0x3a0 kthread+0xf0/0x120 ret_from_fork+0x2d/0x50 To prevent this, flush the misc workqueue during device shutdown to ensure that all pending work items are completed before the driver is unloaded. Note: This approach may slightly increase shutdown latency if the workqueue contains jobs from other devices, but it ensures correctness and stability. Fixes: e5745f34113b ("crypto: qat - enable power management for QAT GEN4") Signed-off-by: Giovanni Cabiddu Cc: stable@vger.kernel.org Reviewed-by: Ahsan Atta Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_common_drv.h | 1 + drivers/crypto/intel/qat/qat_common/adf_init.c | 1 + drivers/crypto/intel/qat/qat_common/adf_isr.c | 5 +++++ 3 files changed, 7 insertions(+) diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h index eaa6388a6678..7a022bd4ae07 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h @@ -189,6 +189,7 @@ void adf_exit_misc_wq(void); bool adf_misc_wq_queue_work(struct work_struct *work); bool adf_misc_wq_queue_delayed_work(struct delayed_work *work, unsigned long delay); +void adf_misc_wq_flush(void); #if defined(CONFIG_PCI_IOV) int adf_sriov_configure(struct pci_dev *pdev, int numvfs); void adf_disable_sriov(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c index f189cce7d153..46491048e0bb 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_init.c +++ b/drivers/crypto/intel/qat/qat_common/adf_init.c @@ -404,6 +404,7 @@ static void adf_dev_shutdown(struct adf_accel_dev *accel_dev) hw_data->exit_admin_comms(accel_dev); adf_cleanup_etr_data(accel_dev); + adf_misc_wq_flush(); adf_dev_restore(accel_dev); } diff --git a/drivers/crypto/intel/qat/qat_common/adf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_isr.c index cae1aee5479a..12e565613661 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_isr.c +++ b/drivers/crypto/intel/qat/qat_common/adf_isr.c @@ -407,3 +407,8 @@ bool adf_misc_wq_queue_delayed_work(struct delayed_work *work, { return queue_delayed_work(adf_misc_wq, work, delay); } + +void adf_misc_wq_flush(void) +{ + flush_workqueue(adf_misc_wq); +} From c470ffa6f48619e8ea2442206b31b7965f8826a5 Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Fri, 11 Jul 2025 21:29:31 +0300 Subject: [PATCH 107/116] crypto: engine - remove request batching support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove request batching support from crypto_engine, as there are no drivers using this feature and it doesn't really work that well. Instead of doing batching based on backlog, a more optimal approach would be for the user to handle the batching (similar to how IPsec can hook into GSO to get 64K of data each time or how block encryption can use unit sizes much greater than 4K). Suggested-by: Herbert Xu Signed-off-by: Ovidiu Panait Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- arch/s390/crypto/paes_s390.c | 2 +- arch/s390/crypto/phmac_s390.c | 2 +- crypto/crypto_engine.c | 25 +--------------------- drivers/crypto/caam/jr.c | 3 +-- drivers/crypto/virtio/virtio_crypto_core.c | 2 +- include/crypto/engine.h | 1 - include/crypto/internal/engine.h | 4 ---- 7 files changed, 5 insertions(+), 34 deletions(-) diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 8a340c16acb4..a624a43a2b54 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -1633,7 +1633,7 @@ static int __init paes_s390_init(void) /* with this pseudo devie alloc and start a crypto engine */ paes_crypto_engine = crypto_engine_alloc_init_and_set(paes_dev.this_device, - true, NULL, false, MAX_QLEN); + true, false, MAX_QLEN); if (!paes_crypto_engine) { rc = -ENOMEM; goto out_err; diff --git a/arch/s390/crypto/phmac_s390.c b/arch/s390/crypto/phmac_s390.c index 90602f72108f..7ecfdc4fba2d 100644 --- a/arch/s390/crypto/phmac_s390.c +++ b/arch/s390/crypto/phmac_s390.c @@ -1006,7 +1006,7 @@ static int __init s390_phmac_init(void) /* with this pseudo device alloc and start a crypto engine */ phmac_crypto_engine = crypto_engine_alloc_init_and_set(phmac_dev.this_device, - true, NULL, false, MAX_QLEN); + true, false, MAX_QLEN); if (!phmac_crypto_engine) { rc = -ENOMEM; goto out_err; diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c index 445d3c113ee1..8a2400f240d4 100644 --- a/crypto/crypto_engine.c +++ b/crypto/crypto_engine.c @@ -195,17 +195,6 @@ static void crypto_pump_requests(struct crypto_engine *engine, out: spin_unlock_irqrestore(&engine->queue_lock, flags); - /* - * Batch requests is possible only if - * hardware can enqueue multiple requests - */ - if (engine->do_batch_requests) { - ret = engine->do_batch_requests(engine); - if (ret) - dev_err(engine->dev, "failed to do batch requests: %d\n", - ret); - } - return; } @@ -462,12 +451,6 @@ EXPORT_SYMBOL_GPL(crypto_engine_stop); * crypto-engine queue. * @dev: the device attached with one hardware engine * @retry_support: whether hardware has support for retry mechanism - * @cbk_do_batch: pointer to a callback function to be invoked when executing - * a batch of requests. - * This has the form: - * callback(struct crypto_engine *engine) - * where: - * engine: the crypto engine structure. * @rt: whether this queue is set to run as a realtime task * @qlen: maximum size of the crypto-engine queue * @@ -476,7 +459,6 @@ EXPORT_SYMBOL_GPL(crypto_engine_stop); */ struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev, bool retry_support, - int (*cbk_do_batch)(struct crypto_engine *engine), bool rt, int qlen) { struct crypto_engine *engine; @@ -495,11 +477,6 @@ struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev, engine->idling = false; engine->retry_support = retry_support; engine->priv_data = dev; - /* - * Batch requests is possible only if - * hardware has support for retry mechanism. - */ - engine->do_batch_requests = retry_support ? cbk_do_batch : NULL; snprintf(engine->name, sizeof(engine->name), "%s-engine", dev_name(dev)); @@ -534,7 +511,7 @@ EXPORT_SYMBOL_GPL(crypto_engine_alloc_init_and_set); */ struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt) { - return crypto_engine_alloc_init_and_set(dev, false, NULL, rt, + return crypto_engine_alloc_init_and_set(dev, false, rt, CRYPTO_ENGINE_MAX_QLEN); } EXPORT_SYMBOL_GPL(crypto_engine_alloc_init); diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 9fcdb64084ac..0ef00df9730e 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -629,8 +629,7 @@ static int caam_jr_probe(struct platform_device *pdev) } /* Initialize crypto engine */ - jrpriv->engine = crypto_engine_alloc_init_and_set(jrdev, true, NULL, - false, + jrpriv->engine = crypto_engine_alloc_init_and_set(jrdev, true, false, CRYPTO_ENGINE_MAX_QLEN); if (!jrpriv->engine) { dev_err(jrdev, "Could not init crypto-engine\n"); diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 0d522049f595..3d241446099c 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -139,7 +139,7 @@ static int virtcrypto_find_vqs(struct virtio_crypto *vi) spin_lock_init(&vi->data_vq[i].lock); vi->data_vq[i].vq = vqs[i]; /* Initialize crypto engine */ - vi->data_vq[i].engine = crypto_engine_alloc_init_and_set(dev, true, NULL, true, + vi->data_vq[i].engine = crypto_engine_alloc_init_and_set(dev, true, true, virtqueue_get_vring_size(vqs[i])); if (!vi->data_vq[i].engine) { ret = -ENOMEM; diff --git a/include/crypto/engine.h b/include/crypto/engine.h index 545dbefe3e13..2e60344437da 100644 --- a/include/crypto/engine.h +++ b/include/crypto/engine.h @@ -76,7 +76,6 @@ int crypto_engine_stop(struct crypto_engine *engine); struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt); struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev, bool retry_support, - int (*cbk_do_batch)(struct crypto_engine *engine), bool rt, int qlen); void crypto_engine_exit(struct crypto_engine *engine); diff --git a/include/crypto/internal/engine.h b/include/crypto/internal/engine.h index b6a4ea2240fc..8da1a13619c9 100644 --- a/include/crypto/internal/engine.h +++ b/include/crypto/internal/engine.h @@ -37,8 +37,6 @@ struct device; * @unprepare_crypt_hardware: there are currently no more requests on the * queue so the subsystem notifies the driver that it may relax the * hardware by issuing this call - * @do_batch_requests: execute a batch of requests. Depends on multiple - * requests support. * @kworker: kthread worker struct for request pump * @pump_requests: work struct for scheduling work to the request pump * @priv_data: the engine private data @@ -60,8 +58,6 @@ struct crypto_engine { int (*prepare_crypt_hardware)(struct crypto_engine *engine); int (*unprepare_crypt_hardware)(struct crypto_engine *engine); - int (*do_batch_requests)(struct crypto_engine *engine); - struct kthread_worker *kworker; struct kthread_work pump_requests; From 5eb32430df783e212ffed8d35cc494a8941cda0a Mon Sep 17 00:00:00 2001 From: Ovidiu Panait Date: Fri, 11 Jul 2025 21:29:32 +0300 Subject: [PATCH 108/116] crypto: engine - remove {prepare,unprepare}_crypt_hardware callbacks The {prepare,unprepare}_crypt_hardware callbacks were added back in 2016 by commit 735d37b5424b ("crypto: engine - Introduce the block request crypto engine framework"), but they were never implemented by any driver. Remove them as they are unused. Since the 'engine->idling' and 'was_busy' flags are no longer needed, remove them as well. Signed-off-by: Ovidiu Panait Signed-off-by: Herbert Xu --- Documentation/crypto/crypto_engine.rst | 6 ------ crypto/crypto_engine.c | 30 +------------------------- include/crypto/internal/engine.h | 11 ---------- 3 files changed, 1 insertion(+), 46 deletions(-) diff --git a/Documentation/crypto/crypto_engine.rst b/Documentation/crypto/crypto_engine.rst index d562ea17d994..7ef850e28016 100644 --- a/Documentation/crypto/crypto_engine.rst +++ b/Documentation/crypto/crypto_engine.rst @@ -36,12 +36,6 @@ engine using ``crypto_engine_stop()`` and destroy the engine with Before transferring any request, you have to fill the context enginectx by providing functions for the following: -* ``prepare_crypt_hardware``: Called once before any prepare functions are - called. - -* ``unprepare_crypt_hardware``: Called once after all unprepare functions have - been called. - * ``prepare_cipher_request``/``prepare_hash_request``: Called before each corresponding request is performed. If some processing or other preparatory work is required, do it here. diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c index 8a2400f240d4..18e1689efe12 100644 --- a/crypto/crypto_engine.c +++ b/crypto/crypto_engine.c @@ -74,7 +74,6 @@ static void crypto_pump_requests(struct crypto_engine *engine, struct crypto_engine_alg *alg; struct crypto_engine_op *op; unsigned long flags; - bool was_busy = false; int ret; spin_lock_irqsave(&engine->queue_lock, flags); @@ -83,12 +82,6 @@ static void crypto_pump_requests(struct crypto_engine *engine, if (!engine->retry_support && engine->cur_req) goto out; - /* If another context is idling then defer */ - if (engine->idling) { - kthread_queue_work(engine->kworker, &engine->pump_requests); - goto out; - } - /* Check if the engine queue is idle */ if (!crypto_queue_len(&engine->queue) || !engine->running) { if (!engine->busy) @@ -102,15 +95,6 @@ static void crypto_pump_requests(struct crypto_engine *engine, } engine->busy = false; - engine->idling = true; - spin_unlock_irqrestore(&engine->queue_lock, flags); - - if (engine->unprepare_crypt_hardware && - engine->unprepare_crypt_hardware(engine)) - dev_err(engine->dev, "failed to unprepare crypt hardware\n"); - - spin_lock_irqsave(&engine->queue_lock, flags); - engine->idling = false; goto out; } @@ -129,22 +113,11 @@ static void crypto_pump_requests(struct crypto_engine *engine, if (!engine->retry_support) engine->cur_req = async_req; - if (engine->busy) - was_busy = true; - else + if (!engine->busy) engine->busy = true; spin_unlock_irqrestore(&engine->queue_lock, flags); - /* Until here we get the request need to be encrypted successfully */ - if (!was_busy && engine->prepare_crypt_hardware) { - ret = engine->prepare_crypt_hardware(engine); - if (ret) { - dev_err(engine->dev, "failed to prepare crypt hardware\n"); - goto req_err_1; - } - } - alg = container_of(async_req->tfm->__crt_alg, struct crypto_engine_alg, base); op = &alg->op; @@ -474,7 +447,6 @@ struct crypto_engine *crypto_engine_alloc_init_and_set(struct device *dev, engine->rt = rt; engine->running = false; engine->busy = false; - engine->idling = false; engine->retry_support = retry_support; engine->priv_data = dev; diff --git a/include/crypto/internal/engine.h b/include/crypto/internal/engine.h index 8da1a13619c9..f19ef376833f 100644 --- a/include/crypto/internal/engine.h +++ b/include/crypto/internal/engine.h @@ -21,7 +21,6 @@ struct device; /* * struct crypto_engine - crypto hardware engine * @name: the engine name - * @idling: the engine is entering idle state * @busy: request pump is busy * @running: the engine is on working * @retry_support: indication that the hardware allows re-execution @@ -31,12 +30,6 @@ struct device; * @list: link with the global crypto engine list * @queue_lock: spinlock to synchronise access to request queue * @queue: the crypto queue of the engine - * @prepare_crypt_hardware: a request will soon arrive from the queue - * so the subsystem requests the driver to prepare the hardware - * by issuing this call - * @unprepare_crypt_hardware: there are currently no more requests on the - * queue so the subsystem notifies the driver that it may relax the - * hardware by issuing this call * @kworker: kthread worker struct for request pump * @pump_requests: work struct for scheduling work to the request pump * @priv_data: the engine private data @@ -44,7 +37,6 @@ struct device; */ struct crypto_engine { char name[ENGINE_NAME_LEN]; - bool idling; bool busy; bool running; @@ -56,9 +48,6 @@ struct crypto_engine { struct crypto_queue queue; struct device *dev; - int (*prepare_crypt_hardware)(struct crypto_engine *engine); - int (*unprepare_crypt_hardware)(struct crypto_engine *engine); - struct kthread_worker *kworker; struct kthread_work pump_requests; From 4fec76bcc5357fb10f2f69c1bf1e163db93616f1 Mon Sep 17 00:00:00 2001 From: Ruben Wauters Date: Sun, 13 Jul 2025 14:13:50 +0100 Subject: [PATCH 109/116] crypto: jitter - replace ARRAY_SIZE definition with header include The ARRAY_SIZE macro is already defined in linux/array_size.h This patch replaces the ARRAY_SIZE definition in jitterentropy.c with an include, to make the code cleaner, and help reduce the number of duplicate ARRAY_SIZE definitions in the codebase. Signed-off-by: Ruben Wauters Signed-off-by: Herbert Xu --- crypto/jitterentropy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 3b390bd6c119..3f93cdc9a7af 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -145,6 +145,7 @@ struct rand_data { */ #define JENT_ENTROPY_SAFETY_FACTOR 64 +#include #include #include #include "jitterentropy.h" @@ -178,7 +179,6 @@ static const unsigned int jent_apt_cutoff_lookup[15] = { static const unsigned int jent_apt_cutoff_permanent_lookup[15] = { 355, 447, 479, 494, 502, 507, 510, 512, 512, 512, 512, 512, 512, 512, 512 }; -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) static void jent_apt_init(struct rand_data *ec, unsigned int osr) { From d41d75fe1b751ee6b347bf1cb1cfe9accc4fcb12 Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 14 Jul 2025 08:07:49 +0100 Subject: [PATCH 110/116] crypto: qat - fix DMA direction for compression on GEN2 devices QAT devices perform an additional integrity check during compression by decompressing the output. Starting from QAT GEN4, this verification is done in-line by the hardware. However, on GEN2 devices, the hardware reads back the compressed output from the destination buffer and performs a decompression operation using it as the source. In the current QAT driver, destination buffers are always marked as write-only. This is incorrect for QAT GEN2 compression, where the buffer is also read during verification. Since commit 6f5dc7658094 ("iommu/vt-d: Restore WO permissions on second-level paging entries"), merged in v6.16-rc1, write-only permissions are strictly enforced, leading to DMAR errors when using QAT GEN2 devices for compression, if VT-d is enabled. Mark the destination buffers as DMA_BIDIRECTIONAL. This ensures compatibility with GEN2 devices, even though it is not required for QAT GEN4 and later. Signed-off-by: Giovanni Cabiddu Fixes: cf5bb835b7c8 ("crypto: qat - fix DMA transfer direction") Reviewed-by: Ahsan Atta Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/qat_bl.c | 6 +++--- drivers/crypto/intel/qat/qat_common/qat_compression.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.c b/drivers/crypto/intel/qat/qat_common/qat_bl.c index 5e4dad4693ca..9b2338f58d97 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_bl.c +++ b/drivers/crypto/intel/qat/qat_common/qat_bl.c @@ -38,7 +38,7 @@ void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, for (i = 0; i < blout->num_mapped_bufs; i++) { dma_unmap_single(dev, blout->buffers[i].addr, blout->buffers[i].len, - DMA_FROM_DEVICE); + DMA_BIDIRECTIONAL); } dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); @@ -162,7 +162,7 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, } buffers[y].addr = dma_map_single(dev, sg_virt(sg) + left, sg->length - left, - DMA_FROM_DEVICE); + DMA_BIDIRECTIONAL); if (unlikely(dma_mapping_error(dev, buffers[y].addr))) goto err_out; buffers[y].len = sg->length; @@ -204,7 +204,7 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, if (!dma_mapping_error(dev, buflout->buffers[i].addr)) dma_unmap_single(dev, buflout->buffers[i].addr, buflout->buffers[i].len, - DMA_FROM_DEVICE); + DMA_BIDIRECTIONAL); } if (!buf->sgl_dst_valid) diff --git a/drivers/crypto/intel/qat/qat_common/qat_compression.c b/drivers/crypto/intel/qat/qat_common/qat_compression.c index 0a77ca65c8d4..53a4db5507ec 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_compression.c +++ b/drivers/crypto/intel/qat/qat_common/qat_compression.c @@ -204,7 +204,7 @@ static int qat_compression_alloc_dc_data(struct adf_accel_dev *accel_dev) if (!obuff) goto err; - obuff_p = dma_map_single(dev, obuff, ovf_buff_sz, DMA_FROM_DEVICE); + obuff_p = dma_map_single(dev, obuff, ovf_buff_sz, DMA_BIDIRECTIONAL); if (unlikely(dma_mapping_error(dev, obuff_p))) goto err; @@ -232,7 +232,7 @@ static void qat_free_dc_data(struct adf_accel_dev *accel_dev) return; dma_unmap_single(dev, dc_data->ovf_buff_p, dc_data->ovf_buff_sz, - DMA_FROM_DEVICE); + DMA_BIDIRECTIONAL); kfree_sensitive(dc_data->ovf_buff); kfree(dc_data); accel_dev->dc_data = NULL; From 6908c5f4f066a0412c3d9a6f543a09fa7d87824b Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 14 Jul 2025 08:10:29 +0100 Subject: [PATCH 111/116] crypto: qat - fix seq_file position update in adf_ring_next() The `adf_ring_next()` function in the QAT debug transport interface fails to correctly update the position index when reaching the end of the ring elements. This triggers the following kernel warning when reading ring files, such as /sys/kernel/debug/qat_c6xx_/transport/bank_00/ring_00: [27725.022965] seq_file: buggy .next function adf_ring_next [intel_qat] did not update position index Ensure that the `*pos` index is incremented before returning NULL when after the last element in the ring is found, satisfying the seq_file API requirements and preventing the warning. Fixes: a672a9dc872e ("crypto: qat - Intel(R) QAT transport code") Signed-off-by: Giovanni Cabiddu Reviewed-by: Ahsan Atta Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_transport_debug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_transport_debug.c b/drivers/crypto/intel/qat/qat_common/adf_transport_debug.c index e2dd568b87b5..621b5d3dfcef 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_transport_debug.c +++ b/drivers/crypto/intel/qat/qat_common/adf_transport_debug.c @@ -31,8 +31,10 @@ static void *adf_ring_next(struct seq_file *sfile, void *v, loff_t *pos) struct adf_etr_ring_data *ring = sfile->private; if (*pos >= (ADF_SIZE_TO_RING_SIZE_IN_BYTES(ring->ring_size) / - ADF_MSG_SIZE_TO_BYTES(ring->msg_size))) + ADF_MSG_SIZE_TO_BYTES(ring->msg_size))) { + (*pos)++; return NULL; + } return ring->base_addr + (ADF_MSG_SIZE_TO_BYTES(ring->msg_size) * (*pos)++); From 0fab5ee0d477b3bfe0ff72a78d151a49f72558fa Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Mon, 14 Jul 2025 08:10:30 +0100 Subject: [PATCH 112/116] crypto: qat - refactor ring-related debug functions Refactor the functions `adf_ring_start()` and `adf_ring_next()` to improve readability. This does not introduce any functional change. Signed-off-by: Giovanni Cabiddu Reviewed-by: Ahsan Atta Signed-off-by: Herbert Xu --- .../qat/qat_common/adf_transport_debug.c | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_transport_debug.c b/drivers/crypto/intel/qat/qat_common/adf_transport_debug.c index 621b5d3dfcef..6c22bc9b28e4 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_transport_debug.c +++ b/drivers/crypto/intel/qat/qat_common/adf_transport_debug.c @@ -10,16 +10,21 @@ static DEFINE_MUTEX(ring_read_lock); static DEFINE_MUTEX(bank_read_lock); +#define ADF_RING_NUM_MSGS(ring) \ + (ADF_SIZE_TO_RING_SIZE_IN_BYTES(ring->ring_size) / \ + ADF_MSG_SIZE_TO_BYTES(ring->msg_size)) + static void *adf_ring_start(struct seq_file *sfile, loff_t *pos) { struct adf_etr_ring_data *ring = sfile->private; + unsigned int num_msg = ADF_RING_NUM_MSGS(ring); + loff_t val = *pos; mutex_lock(&ring_read_lock); - if (*pos == 0) + if (val == 0) return SEQ_START_TOKEN; - if (*pos >= (ADF_SIZE_TO_RING_SIZE_IN_BYTES(ring->ring_size) / - ADF_MSG_SIZE_TO_BYTES(ring->msg_size))) + if (val >= num_msg) return NULL; return ring->base_addr + @@ -29,15 +34,15 @@ static void *adf_ring_start(struct seq_file *sfile, loff_t *pos) static void *adf_ring_next(struct seq_file *sfile, void *v, loff_t *pos) { struct adf_etr_ring_data *ring = sfile->private; + unsigned int num_msg = ADF_RING_NUM_MSGS(ring); + loff_t val = *pos; - if (*pos >= (ADF_SIZE_TO_RING_SIZE_IN_BYTES(ring->ring_size) / - ADF_MSG_SIZE_TO_BYTES(ring->msg_size))) { - (*pos)++; + (*pos)++; + + if (val >= num_msg) return NULL; - } - return ring->base_addr + - (ADF_MSG_SIZE_TO_BYTES(ring->msg_size) * (*pos)++); + return ring->base_addr + (ADF_MSG_SIZE_TO_BYTES(ring->msg_size) * val); } static int adf_ring_show(struct seq_file *sfile, void *v) From a71475582ada92ba021852bf3c2b40ab3718549b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 14 Jul 2025 16:59:12 +0200 Subject: [PATCH 113/116] crypto: ccp - reduce stack usage in ccp_run_aes_gcm_cmd A number of functions in this file have large structures on the stack, ccp_run_aes_gcm_cmd() being the worst, in particular when KASAN is enabled on gcc: drivers/crypto/ccp/ccp-ops.c: In function 'ccp_run_sha_cmd': drivers/crypto/ccp/ccp-ops.c:1833:1: error: the frame size of 1136 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] drivers/crypto/ccp/ccp-ops.c: In function 'ccp_run_aes_gcm_cmd': drivers/crypto/ccp/ccp-ops.c:914:1: error: the frame size of 1632 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Avoid the issue by using dynamic memory allocation in the worst one of these. Signed-off-by: Arnd Bergmann Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-ops.c | 163 ++++++++++++++++++----------------- 1 file changed, 86 insertions(+), 77 deletions(-) diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 109b5aef4034..d78865d9d5f0 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -633,10 +633,16 @@ static noinline_for_stack int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_aes_engine *aes = &cmd->u.aes; - struct ccp_dm_workarea key, ctx, final_wa, tag; - struct ccp_data src, dst; - struct ccp_data aad; - struct ccp_op op; + struct { + struct ccp_dm_workarea key; + struct ccp_dm_workarea ctx; + struct ccp_dm_workarea final; + struct ccp_dm_workarea tag; + struct ccp_data src; + struct ccp_data dst; + struct ccp_data aad; + struct ccp_op op; + } *wa __cleanup(kfree) = kzalloc(sizeof *wa, GFP_KERNEL); unsigned int dm_offset; unsigned int authsize; unsigned int jobid; @@ -650,6 +656,9 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) struct scatterlist *p_outp, sg_outp[2]; struct scatterlist *p_aad; + if (!wa) + return -ENOMEM; + if (!aes->iv) return -EINVAL; @@ -696,26 +705,26 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) jobid = CCP_NEW_JOBID(cmd_q->ccp); - memset(&op, 0, sizeof(op)); - op.cmd_q = cmd_q; - op.jobid = jobid; - op.sb_key = cmd_q->sb_key; /* Pre-allocated */ - op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ - op.init = 1; - op.u.aes.type = aes->type; + memset(&wa->op, 0, sizeof(wa->op)); + wa->op.cmd_q = cmd_q; + wa->op.jobid = jobid; + wa->op.sb_key = cmd_q->sb_key; /* Pre-allocated */ + wa->op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ + wa->op.init = 1; + wa->op.u.aes.type = aes->type; /* Copy the key to the LSB */ - ret = ccp_init_dm_workarea(&key, cmd_q, + ret = ccp_init_dm_workarea(&wa->key, cmd_q, CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, DMA_TO_DEVICE); if (ret) return ret; dm_offset = CCP_SB_BYTES - aes->key_len; - ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); + ret = ccp_set_dm_area(&wa->key, dm_offset, aes->key, 0, aes->key_len); if (ret) goto e_key; - ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, + ret = ccp_copy_to_sb(cmd_q, &wa->key, wa->op.jobid, wa->op.sb_key, CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; @@ -726,58 +735,58 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) * There is an assumption here that the IV is 96 bits in length, plus * a nonce of 32 bits. If no IV is present, use a zeroed buffer. */ - ret = ccp_init_dm_workarea(&ctx, cmd_q, + ret = ccp_init_dm_workarea(&wa->ctx, cmd_q, CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, DMA_BIDIRECTIONAL); if (ret) goto e_key; dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len; - ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + ret = ccp_set_dm_area(&wa->ctx, dm_offset, aes->iv, 0, aes->iv_len); if (ret) goto e_ctx; - ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + ret = ccp_copy_to_sb(cmd_q, &wa->ctx, wa->op.jobid, wa->op.sb_ctx, CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_ctx; } - op.init = 1; + wa->op.init = 1; if (aes->aad_len > 0) { /* Step 1: Run a GHASH over the Additional Authenticated Data */ - ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len, + ret = ccp_init_data(&wa->aad, cmd_q, p_aad, aes->aad_len, AES_BLOCK_SIZE, DMA_TO_DEVICE); if (ret) goto e_ctx; - op.u.aes.mode = CCP_AES_MODE_GHASH; - op.u.aes.action = CCP_AES_GHASHAAD; + wa->op.u.aes.mode = CCP_AES_MODE_GHASH; + wa->op.u.aes.action = CCP_AES_GHASHAAD; - while (aad.sg_wa.bytes_left) { - ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true); + while (wa->aad.sg_wa.bytes_left) { + ccp_prepare_data(&wa->aad, NULL, &wa->op, AES_BLOCK_SIZE, true); - ret = cmd_q->ccp->vdata->perform->aes(&op); + ret = cmd_q->ccp->vdata->perform->aes(&wa->op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_aad; } - ccp_process_data(&aad, NULL, &op); - op.init = 0; + ccp_process_data(&wa->aad, NULL, &wa->op); + wa->op.init = 0; } } - op.u.aes.mode = CCP_AES_MODE_GCTR; - op.u.aes.action = aes->action; + wa->op.u.aes.mode = CCP_AES_MODE_GCTR; + wa->op.u.aes.action = aes->action; if (ilen > 0) { /* Step 2: Run a GCTR over the plaintext */ in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false; - ret = ccp_init_data(&src, cmd_q, p_inp, ilen, + ret = ccp_init_data(&wa->src, cmd_q, p_inp, ilen, AES_BLOCK_SIZE, in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); @@ -785,52 +794,52 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) goto e_aad; if (in_place) { - dst = src; + wa->dst = wa->src; } else { - ret = ccp_init_data(&dst, cmd_q, p_outp, ilen, + ret = ccp_init_data(&wa->dst, cmd_q, p_outp, ilen, AES_BLOCK_SIZE, DMA_FROM_DEVICE); if (ret) goto e_src; } - op.soc = 0; - op.eom = 0; - op.init = 1; - while (src.sg_wa.bytes_left) { - ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true); - if (!src.sg_wa.bytes_left) { + wa->op.soc = 0; + wa->op.eom = 0; + wa->op.init = 1; + while (wa->src.sg_wa.bytes_left) { + ccp_prepare_data(&wa->src, &wa->dst, &wa->op, AES_BLOCK_SIZE, true); + if (!wa->src.sg_wa.bytes_left) { unsigned int nbytes = ilen % AES_BLOCK_SIZE; if (nbytes) { - op.eom = 1; - op.u.aes.size = (nbytes * 8) - 1; + wa->op.eom = 1; + wa->op.u.aes.size = (nbytes * 8) - 1; } } - ret = cmd_q->ccp->vdata->perform->aes(&op); + ret = cmd_q->ccp->vdata->perform->aes(&wa->op); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; } - ccp_process_data(&src, &dst, &op); - op.init = 0; + ccp_process_data(&wa->src, &wa->dst, &wa->op); + wa->op.init = 0; } } /* Step 3: Update the IV portion of the context with the original IV */ - ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + ret = ccp_copy_from_sb(cmd_q, &wa->ctx, wa->op.jobid, wa->op.sb_ctx, CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_dst; } - ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + ret = ccp_set_dm_area(&wa->ctx, dm_offset, aes->iv, 0, aes->iv_len); if (ret) goto e_dst; - ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + ret = ccp_copy_to_sb(cmd_q, &wa->ctx, wa->op.jobid, wa->op.sb_ctx, CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; @@ -840,75 +849,75 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) /* Step 4: Concatenate the lengths of the AAD and source, and * hash that 16 byte buffer. */ - ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE, + ret = ccp_init_dm_workarea(&wa->final, cmd_q, AES_BLOCK_SIZE, DMA_BIDIRECTIONAL); if (ret) goto e_dst; - final = (__be64 *)final_wa.address; + final = (__be64 *)wa->final.address; final[0] = cpu_to_be64(aes->aad_len * 8); final[1] = cpu_to_be64(ilen * 8); - memset(&op, 0, sizeof(op)); - op.cmd_q = cmd_q; - op.jobid = jobid; - op.sb_key = cmd_q->sb_key; /* Pre-allocated */ - op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ - op.init = 1; - op.u.aes.type = aes->type; - op.u.aes.mode = CCP_AES_MODE_GHASH; - op.u.aes.action = CCP_AES_GHASHFINAL; - op.src.type = CCP_MEMTYPE_SYSTEM; - op.src.u.dma.address = final_wa.dma.address; - op.src.u.dma.length = AES_BLOCK_SIZE; - op.dst.type = CCP_MEMTYPE_SYSTEM; - op.dst.u.dma.address = final_wa.dma.address; - op.dst.u.dma.length = AES_BLOCK_SIZE; - op.eom = 1; - op.u.aes.size = 0; - ret = cmd_q->ccp->vdata->perform->aes(&op); + memset(&wa->op, 0, sizeof(wa->op)); + wa->op.cmd_q = cmd_q; + wa->op.jobid = jobid; + wa->op.sb_key = cmd_q->sb_key; /* Pre-allocated */ + wa->op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ + wa->op.init = 1; + wa->op.u.aes.type = aes->type; + wa->op.u.aes.mode = CCP_AES_MODE_GHASH; + wa->op.u.aes.action = CCP_AES_GHASHFINAL; + wa->op.src.type = CCP_MEMTYPE_SYSTEM; + wa->op.src.u.dma.address = wa->final.dma.address; + wa->op.src.u.dma.length = AES_BLOCK_SIZE; + wa->op.dst.type = CCP_MEMTYPE_SYSTEM; + wa->op.dst.u.dma.address = wa->final.dma.address; + wa->op.dst.u.dma.length = AES_BLOCK_SIZE; + wa->op.eom = 1; + wa->op.u.aes.size = 0; + ret = cmd_q->ccp->vdata->perform->aes(&wa->op); if (ret) goto e_final_wa; if (aes->action == CCP_AES_ACTION_ENCRYPT) { /* Put the ciphered tag after the ciphertext. */ - ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize); + ccp_get_dm_area(&wa->final, 0, p_tag, 0, authsize); } else { /* Does this ciphered tag match the input? */ - ret = ccp_init_dm_workarea(&tag, cmd_q, authsize, + ret = ccp_init_dm_workarea(&wa->tag, cmd_q, authsize, DMA_BIDIRECTIONAL); if (ret) goto e_final_wa; - ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize); + ret = ccp_set_dm_area(&wa->tag, 0, p_tag, 0, authsize); if (ret) { - ccp_dm_free(&tag); + ccp_dm_free(&wa->tag); goto e_final_wa; } - ret = crypto_memneq(tag.address, final_wa.address, + ret = crypto_memneq(wa->tag.address, wa->final.address, authsize) ? -EBADMSG : 0; - ccp_dm_free(&tag); + ccp_dm_free(&wa->tag); } e_final_wa: - ccp_dm_free(&final_wa); + ccp_dm_free(&wa->final); e_dst: if (ilen > 0 && !in_place) - ccp_free_data(&dst, cmd_q); + ccp_free_data(&wa->dst, cmd_q); e_src: if (ilen > 0) - ccp_free_data(&src, cmd_q); + ccp_free_data(&wa->src, cmd_q); e_aad: if (aes->aad_len) - ccp_free_data(&aad, cmd_q); + ccp_free_data(&wa->aad, cmd_q); e_ctx: - ccp_dm_free(&ctx); + ccp_dm_free(&wa->ctx); e_key: - ccp_dm_free(&key); + ccp_dm_free(&wa->key); return ret; } From 301eee1c52d4b8f4d4d995feb932dae742e92bda Mon Sep 17 00:00:00 2001 From: Giovanni Cabiddu Date: Thu, 17 Jul 2025 11:05:43 +0100 Subject: [PATCH 114/116] crypto: qat - make adf_dev_autoreset() static The function adf_dev_autoreset() is only used within adf_aer.c and does not need to be exposed outside the compilation unit. Make it static and remove it from the header adf_common_drv.h. This does not introduce any functional change. Signed-off-by: Giovanni Cabiddu Reviewed-by: Ahsan Atta Signed-off-by: Herbert Xu --- drivers/crypto/intel/qat/qat_common/adf_aer.c | 2 +- drivers/crypto/intel/qat/qat_common/adf_common_drv.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_aer.c b/drivers/crypto/intel/qat/qat_common/adf_aer.c index 4cb8bd83f570..35679b21ff63 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_aer.c +++ b/drivers/crypto/intel/qat/qat_common/adf_aer.c @@ -229,7 +229,7 @@ const struct pci_error_handlers adf_err_handler = { }; EXPORT_SYMBOL_GPL(adf_err_handler); -int adf_dev_autoreset(struct adf_accel_dev *accel_dev) +static int adf_dev_autoreset(struct adf_accel_dev *accel_dev) { if (accel_dev->autoreset_on_error) return adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_ASYNC); diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h index 7a022bd4ae07..6cf3a95489e8 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h @@ -86,7 +86,6 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev); extern const struct pci_error_handlers adf_err_handler; void adf_reset_sbr(struct adf_accel_dev *accel_dev); void adf_reset_flr(struct adf_accel_dev *accel_dev); -int adf_dev_autoreset(struct adf_accel_dev *accel_dev); void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); From 982fd1a74de63c388c060e4fa6f7fbd088d6d02e Mon Sep 17 00:00:00 2001 From: Zhiqi Song Date: Fri, 18 Jul 2025 18:05:01 +0800 Subject: [PATCH 115/116] crypto: hisilicon/hpre - fix dma unmap sequence Perform DMA unmapping operations before processing data. Otherwise, there may be unsynchronized data accessed by the CPU when the SWIOTLB is enabled. Signed-off-by: Zhiqi Song Signed-off-by: Chenghai Huang Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_crypto.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 61b5e1c5d019..1550c3818383 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -1491,11 +1491,13 @@ static void hpre_ecdh_cb(struct hpre_ctx *ctx, void *resp) if (overtime_thrhld && hpre_is_bd_timeout(req, overtime_thrhld)) atomic64_inc(&dfx[HPRE_OVER_THRHLD_CNT].value); + /* Do unmap before data processing */ + hpre_ecdh_hw_data_clr_all(ctx, req, areq->dst, areq->src); + p = sg_virt(areq->dst); memmove(p, p + ctx->key_sz - curve_sz, curve_sz); memmove(p + curve_sz, p + areq->dst_len - curve_sz, curve_sz); - hpre_ecdh_hw_data_clr_all(ctx, req, areq->dst, areq->src); kpp_request_complete(areq, ret); atomic64_inc(&dfx[HPRE_RECV_CNT].value); @@ -1808,9 +1810,11 @@ static void hpre_curve25519_cb(struct hpre_ctx *ctx, void *resp) if (overtime_thrhld && hpre_is_bd_timeout(req, overtime_thrhld)) atomic64_inc(&dfx[HPRE_OVER_THRHLD_CNT].value); + /* Do unmap before data processing */ + hpre_curve25519_hw_data_clr_all(ctx, req, areq->dst, areq->src); + hpre_key_to_big_end(sg_virt(areq->dst), CURVE25519_KEY_SIZE); - hpre_curve25519_hw_data_clr_all(ctx, req, areq->dst, areq->src); kpp_request_complete(areq, ret); atomic64_inc(&dfx[HPRE_RECV_CNT].value); From bf24d64268544379d9a9b5b8efc2bb03967703b3 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 20 Jul 2025 20:26:05 +0200 Subject: [PATCH 116/116] crypto: keembay - Use min() to simplify ocs_create_linked_list_from_sg() Use min() to simplify ocs_create_linked_list_from_sg() and improve its readability. Signed-off-by: Thorsten Blum Signed-off-by: Herbert Xu --- drivers/crypto/intel/keembay/ocs-aes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/intel/keembay/ocs-aes.c b/drivers/crypto/intel/keembay/ocs-aes.c index be9f32fc8f42..bb6f33f6b4d3 100644 --- a/drivers/crypto/intel/keembay/ocs-aes.c +++ b/drivers/crypto/intel/keembay/ocs-aes.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -1473,8 +1474,7 @@ int ocs_create_linked_list_from_sg(const struct ocs_aes_dev *aes_dev, ll = dll_desc->vaddr; for (i = 0; i < dma_nents; i++, sg = sg_next(sg)) { ll[i].src_addr = sg_dma_address(sg) + data_offset; - ll[i].src_len = (sg_dma_len(sg) - data_offset) < data_size ? - (sg_dma_len(sg) - data_offset) : data_size; + ll[i].src_len = min(sg_dma_len(sg) - data_offset, data_size); data_offset = 0; data_size -= ll[i].src_len; /* Current element points to the DMA address of the next one. */