crypto: scatterwalk - Fix memcpy_sglist() to always succeed

The original implementation of memcpy_sglist() was broken because it
didn't handle scatterlists that describe exactly the same memory, which
is a case that many callers rely on.  The current implementation is
broken too because it calls the skcipher_walk functions which can fail.
It ignores any errors from those functions.

Fix it by replacing it with a new implementation written from scratch.
It always succeeds.  It's also a bit faster, since it avoids the
overhead of skcipher_walk.  skcipher_walk includes a lot of
functionality (such as alignmask handling) that's irrelevant here.

Reported-by: Colin Ian King <coking@nvidia.com>
Closes: https://lore.kernel.org/r/20251114122620.111623-1-coking@nvidia.com
Fixes: 131bdceca1 ("crypto: scatterwalk - Add memcpy_sglist")
Fixes: 0f8d42bf12 ("crypto: scatterwalk - Move skcipher walk and use it for memcpy_sglist")
Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Eric Biggers 2025-11-15 15:08:16 -08:00 committed by Herbert Xu
parent 5727a844a3
commit 4dffc9bbff
2 changed files with 114 additions and 33 deletions

View File

@ -101,26 +101,97 @@ void memcpy_to_sglist(struct scatterlist *sg, unsigned int start,
} }
EXPORT_SYMBOL_GPL(memcpy_to_sglist); EXPORT_SYMBOL_GPL(memcpy_to_sglist);
/**
* memcpy_sglist() - Copy data from one scatterlist to another
* @dst: The destination scatterlist. Can be NULL if @nbytes == 0.
* @src: The source scatterlist. Can be NULL if @nbytes == 0.
* @nbytes: Number of bytes to copy
*
* The scatterlists can describe exactly the same memory, in which case this
* function is a no-op. No other overlaps are supported.
*
* Context: Any context
*/
void memcpy_sglist(struct scatterlist *dst, struct scatterlist *src, void memcpy_sglist(struct scatterlist *dst, struct scatterlist *src,
unsigned int nbytes) unsigned int nbytes)
{ {
struct skcipher_walk walk = {}; unsigned int src_offset, dst_offset;
if (unlikely(nbytes == 0)) /* in case sg == NULL */ if (unlikely(nbytes == 0)) /* in case src and/or dst is NULL */
return; return;
walk.total = nbytes; src_offset = src->offset;
dst_offset = dst->offset;
for (;;) {
/* Compute the length to copy this step. */
unsigned int len = min3(src->offset + src->length - src_offset,
dst->offset + dst->length - dst_offset,
nbytes);
struct page *src_page = sg_page(src);
struct page *dst_page = sg_page(dst);
const void *src_virt;
void *dst_virt;
scatterwalk_start(&walk.in, src); if (IS_ENABLED(CONFIG_HIGHMEM)) {
scatterwalk_start(&walk.out, dst); /* HIGHMEM: we may have to actually map the pages. */
const unsigned int src_oip = offset_in_page(src_offset);
const unsigned int dst_oip = offset_in_page(dst_offset);
const unsigned int limit = PAGE_SIZE;
skcipher_walk_first(&walk, true); /* Further limit len to not cross a page boundary. */
do { len = min3(len, limit - src_oip, limit - dst_oip);
if (walk.src.virt.addr != walk.dst.virt.addr)
memcpy(walk.dst.virt.addr, walk.src.virt.addr, /* Compute the source and destination pages. */
walk.nbytes); src_page += src_offset / PAGE_SIZE;
skcipher_walk_done(&walk, 0); dst_page += dst_offset / PAGE_SIZE;
} while (walk.nbytes);
if (src_page != dst_page) {
/* Copy between different pages. */
memcpy_page(dst_page, dst_oip,
src_page, src_oip, len);
flush_dcache_page(dst_page);
} else if (src_oip != dst_oip) {
/* Copy between different parts of same page. */
dst_virt = kmap_local_page(dst_page);
memcpy(dst_virt + dst_oip, dst_virt + src_oip,
len);
kunmap_local(dst_virt);
flush_dcache_page(dst_page);
} /* Else, it's the same memory. No action needed. */
} else {
/*
* !HIGHMEM: no mapping needed. Just work in the linear
* buffer of each sg entry. Note that we can cross page
* boundaries, as they are not significant in this case.
*/
src_virt = page_address(src_page) + src_offset;
dst_virt = page_address(dst_page) + dst_offset;
if (src_virt != dst_virt) {
memcpy(dst_virt, src_virt, len);
if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
__scatterwalk_flush_dcache_pages(
dst_page, dst_offset, len);
} /* Else, it's the same memory. No action needed. */
}
nbytes -= len;
if (nbytes == 0) /* No more to copy? */
break;
/*
* There's more to copy. Advance the offsets by the length
* copied this step, and advance the sg entries as needed.
*/
src_offset += len;
if (src_offset >= src->offset + src->length) {
src = sg_next(src);
src_offset = src->offset;
}
dst_offset += len;
if (dst_offset >= dst->offset + dst->length) {
dst = sg_next(dst);
dst_offset = dst->offset;
}
}
} }
EXPORT_SYMBOL_GPL(memcpy_sglist); EXPORT_SYMBOL_GPL(memcpy_sglist);

View File

@ -227,6 +227,34 @@ static inline void scatterwalk_done_src(struct scatter_walk *walk,
scatterwalk_advance(walk, nbytes); scatterwalk_advance(walk, nbytes);
} }
/*
* Flush the dcache of any pages that overlap the region
* [offset, offset + nbytes) relative to base_page.
*
* This should be called only when ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE, to ensure
* that all relevant code (including the call to sg_page() in the caller, if
* applicable) gets fully optimized out when !ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE.
*/
static inline void __scatterwalk_flush_dcache_pages(struct page *base_page,
unsigned int offset,
unsigned int nbytes)
{
unsigned int num_pages;
base_page += offset / PAGE_SIZE;
offset %= PAGE_SIZE;
/*
* This is an overflow-safe version of
* num_pages = DIV_ROUND_UP(offset + nbytes, PAGE_SIZE).
*/
num_pages = nbytes / PAGE_SIZE;
num_pages += DIV_ROUND_UP(offset + (nbytes % PAGE_SIZE), PAGE_SIZE);
for (unsigned int i = 0; i < num_pages; i++)
flush_dcache_page(base_page + i);
}
/** /**
* scatterwalk_done_dst() - Finish one step of a walk of destination scatterlist * scatterwalk_done_dst() - Finish one step of a walk of destination scatterlist
* @walk: the scatter_walk * @walk: the scatter_walk
@ -240,27 +268,9 @@ static inline void scatterwalk_done_dst(struct scatter_walk *walk,
unsigned int nbytes) unsigned int nbytes)
{ {
scatterwalk_unmap(walk); scatterwalk_unmap(walk);
/* if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE)
* Explicitly check ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE instead of just __scatterwalk_flush_dcache_pages(sg_page(walk->sg),
* relying on flush_dcache_page() being a no-op when not implemented, walk->offset, nbytes);
* since otherwise the BUG_ON in sg_page() does not get optimized out.
* This also avoids having to consider whether the loop would get
* reliably optimized out or not.
*/
if (ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE) {
struct page *base_page;
unsigned int offset;
int start, end, i;
base_page = sg_page(walk->sg);
offset = walk->offset;
start = offset >> PAGE_SHIFT;
end = start + (nbytes >> PAGE_SHIFT);
end += (offset_in_page(offset) + offset_in_page(nbytes) +
PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = start; i < end; i++)
flush_dcache_page(base_page + i);
}
scatterwalk_advance(walk, nbytes); scatterwalk_advance(walk, nbytes);
} }