erofs: support DEFLATE decompression by using Intel QAT

This patch introduces the use of the Intel QAT to offload EROFS data
decompression, aiming to improve the decompression performance.

A 285MiB dataset is used with the following command to create EROFS
images with different cluster sizes:
     $ mkfs.erofs -zdeflate,level=9 -C{4096,16384,65536,131072,262144}

Fio is used to test the following read patterns:
     $ fio -filename=testfile -bs=4k -rw=read -name=job1
     $ fio -filename=testfile -bs=4k -rw=randread -name=job1
     $ fio -filename=testfile -bs=4k -rw=randread --io_size=14m -name=job1

Here are some performance numbers for reference:

Processors: Intel(R) Xeon(R) 6766E (144 cores)
Memory:     512 GiB

|-----------------------------------------------------------------------------|
|           | Cluster size | sequential read | randread  | small randread(5%) |
|-----------|--------------|-----------------|-----------|--------------------|
| Intel QAT |    4096      |    538  MiB/s   | 112 MiB/s |     20.76 MiB/s    |
| Intel QAT |    16384     |    699  MiB/s   | 158 MiB/s |     21.02 MiB/s    |
| Intel QAT |    65536     |    917  MiB/s   | 278 MiB/s |     20.90 MiB/s    |
| Intel QAT |    131072    |    1056 MiB/s   | 351 MiB/s |     23.36 MiB/s    |
| Intel QAT |    262144    |    1145 MiB/s   | 431 MiB/s |     26.66 MiB/s    |
| deflate   |    4096      |    499  MiB/s   | 108 MiB/s |     21.50 MiB/s    |
| deflate   |    16384     |    422  MiB/s   | 125 MiB/s |     18.94 MiB/s    |
| deflate   |    65536     |    452  MiB/s   | 159 MiB/s |     13.02 MiB/s    |
| deflate   |    131072    |    452  MiB/s   | 177 MiB/s |     11.44 MiB/s    |
| deflate   |    262144    |    466  MiB/s   | 194 MiB/s |     10.60 MiB/s    |

Signed-off-by: Bo Liu <liubo03@inspur.com>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Link: https://lore.kernel.org/r/20250522094931.28956-1-liubo03@inspur.com
[ Gao Xiang: refine the commit message. ]
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
This commit is contained in:
Bo Liu 2025-05-22 05:49:31 -04:00 committed by Gao Xiang
parent 17a2a72df3
commit b4a29efc51
8 changed files with 265 additions and 5 deletions

View File

@ -27,3 +27,11 @@ Description: Writing to this will drop compression-related caches,
- 1 : invalidate cached compressed folios
- 2 : drop in-memory pclusters
- 3 : drop in-memory pclusters and cached compressed folios
What: /sys/fs/erofs/accel
Date: May 2025
Contact: "Bo Liu" <liubo03@inspur.com>
Description: Used to set or show hardware accelerators in effect
and multiple accelerators are separated by '\n'.
Supported accelerator(s): qat_deflate.
Disable all accelerators with an empty string (echo > accel).

View File

@ -144,6 +144,20 @@ config EROFS_FS_ZIP_ZSTD
If unsure, say N.
config EROFS_FS_ZIP_ACCEL
bool "EROFS hardware decompression support"
depends on EROFS_FS_ZIP
help
Saying Y here includes hardware accelerator support for reading
EROFS file systems containing compressed data. It gives better
decompression speed than the software-implemented decompression, and
it costs lower CPU overhead.
Hardware accelerator support is an experimental feature for now and
file systems are still readable without selecting this option.
If unsure, say N.
config EROFS_FS_ONDEMAND
bool "EROFS fscache-based on-demand read support (deprecated)"
depends on EROFS_FS

View File

@ -7,5 +7,6 @@ erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o zutil.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
erofs-$(CONFIG_EROFS_FS_ZIP_ZSTD) += decompressor_zstd.o
erofs-$(CONFIG_EROFS_FS_ZIP_ACCEL) += decompressor_crypto.o
erofs-$(CONFIG_EROFS_FS_BACKED_BY_FILE) += fileio.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o

View File

@ -76,4 +76,14 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
unsigned int padbufsize);
int __init z_erofs_init_decompressor(void);
void z_erofs_exit_decompressor(void);
int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
struct page **pgpl);
int z_erofs_crypto_enable_engine(const char *name, int len);
#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
void z_erofs_crypto_disable_all_engines(void);
int z_erofs_crypto_show_engines(char *buf, int size, char sep);
#else
static inline void z_erofs_crypto_disable_all_engines(void) {}
static inline int z_erofs_crypto_show_engines(char *buf, int size, char sep) { return 0; }
#endif
#endif

View File

@ -0,0 +1,181 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <linux/scatterlist.h>
#include <crypto/acompress.h>
#include "compress.h"
static int __z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
struct crypto_acomp *tfm)
{
struct sg_table st_src, st_dst;
struct acomp_req *req;
struct crypto_wait wait;
u8 *headpage;
int ret;
headpage = kmap_local_page(*rq->in);
ret = z_erofs_fixup_insize(rq, headpage + rq->pageofs_in,
min_t(unsigned int, rq->inputsize,
rq->sb->s_blocksize - rq->pageofs_in));
kunmap_local(headpage);
if (ret)
return ret;
req = acomp_request_alloc(tfm);
if (!req)
return -ENOMEM;
ret = sg_alloc_table_from_pages_segment(&st_src, rq->in, rq->inpages,
rq->pageofs_in, rq->inputsize, UINT_MAX, GFP_KERNEL);
if (ret < 0)
goto failed_src_alloc;
ret = sg_alloc_table_from_pages_segment(&st_dst, rq->out, rq->outpages,
rq->pageofs_out, rq->outputsize, UINT_MAX, GFP_KERNEL);
if (ret < 0)
goto failed_dst_alloc;
acomp_request_set_params(req, st_src.sgl,
st_dst.sgl, rq->inputsize, rq->outputsize);
crypto_init_wait(&wait);
acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
crypto_req_done, &wait);
ret = crypto_wait_req(crypto_acomp_decompress(req), &wait);
if (ret) {
erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
ret, rq->inputsize, rq->pageofs_in, rq->outputsize);
ret = -EIO;
}
sg_free_table(&st_dst);
failed_dst_alloc:
sg_free_table(&st_src);
failed_src_alloc:
acomp_request_free(req);
return ret;
}
struct z_erofs_crypto_engine {
char *crypto_name;
struct crypto_acomp *tfm;
};
struct z_erofs_crypto_engine *z_erofs_crypto[Z_EROFS_COMPRESSION_MAX] = {
[Z_EROFS_COMPRESSION_LZ4] = (struct z_erofs_crypto_engine[]) {
{},
},
[Z_EROFS_COMPRESSION_LZMA] = (struct z_erofs_crypto_engine[]) {
{},
},
[Z_EROFS_COMPRESSION_DEFLATE] = (struct z_erofs_crypto_engine[]) {
{ .crypto_name = "qat_deflate", },
{},
},
[Z_EROFS_COMPRESSION_ZSTD] = (struct z_erofs_crypto_engine[]) {
{},
},
};
static DECLARE_RWSEM(z_erofs_crypto_rwsem);
static struct crypto_acomp *z_erofs_crypto_get_engine(int alg)
{
struct z_erofs_crypto_engine *e;
for (e = z_erofs_crypto[alg]; e->crypto_name; ++e)
if (e->tfm)
return e->tfm;
return NULL;
}
int z_erofs_crypto_decompress(struct z_erofs_decompress_req *rq,
struct page **pgpl)
{
struct crypto_acomp *tfm;
int i, err;
down_read(&z_erofs_crypto_rwsem);
tfm = z_erofs_crypto_get_engine(rq->alg);
if (!tfm) {
err = -EOPNOTSUPP;
goto out;
}
for (i = 0; i < rq->outpages; i++) {
struct page *const page = rq->out[i];
struct page *victim;
if (!page) {
victim = __erofs_allocpage(pgpl, rq->gfp, true);
if (!victim) {
err = -ENOMEM;
goto out;
}
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
rq->out[i] = victim;
}
}
err = __z_erofs_crypto_decompress(rq, tfm);
out:
up_read(&z_erofs_crypto_rwsem);
return err;
}
int z_erofs_crypto_enable_engine(const char *name, int len)
{
struct z_erofs_crypto_engine *e;
struct crypto_acomp *tfm;
int alg;
down_write(&z_erofs_crypto_rwsem);
for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
if (!strncmp(name, e->crypto_name, len)) {
if (e->tfm)
break;
tfm = crypto_alloc_acomp(e->crypto_name, 0, 0);
if (IS_ERR(tfm)) {
up_write(&z_erofs_crypto_rwsem);
return -EOPNOTSUPP;
}
e->tfm = tfm;
break;
}
}
}
up_write(&z_erofs_crypto_rwsem);
return 0;
}
void z_erofs_crypto_disable_all_engines(void)
{
struct z_erofs_crypto_engine *e;
int alg;
down_write(&z_erofs_crypto_rwsem);
for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
if (!e->tfm)
continue;
crypto_free_acomp(e->tfm);
e->tfm = NULL;
}
}
up_write(&z_erofs_crypto_rwsem);
}
int z_erofs_crypto_show_engines(char *buf, int size, char sep)
{
struct z_erofs_crypto_engine *e;
int alg, len = 0;
for (alg = 0; alg < Z_EROFS_COMPRESSION_MAX; ++alg) {
for (e = z_erofs_crypto[alg]; e->crypto_name; ++e) {
if (!e->tfm)
continue;
len += scnprintf(buf + len, size - len, "%s%c",
e->crypto_name, sep);
}
}
return len;
}

View File

@ -97,7 +97,7 @@ static int z_erofs_load_deflate_config(struct super_block *sb,
return -ENOMEM;
}
static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
static int __z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
struct page **pgpl)
{
struct super_block *sb = rq->sb;
@ -178,6 +178,22 @@ static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
return err;
}
static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
struct page **pgpl)
{
#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
int err;
if (!rq->partial_decoding) {
err = z_erofs_crypto_decompress(rq, pgpl);
if (err != -EOPNOTSUPP)
return err;
}
#endif
return __z_erofs_deflate_decompress(rq, pgpl);
}
const struct z_erofs_decompressor z_erofs_deflate_decomp = {
.config = z_erofs_load_deflate_config,
.decompress = z_erofs_deflate_decompress,

View File

@ -7,12 +7,14 @@
#include <linux/kobject.h>
#include "internal.h"
#include "compress.h"
enum {
attr_feature,
attr_drop_caches,
attr_pointer_ui,
attr_pointer_bool,
attr_accel,
};
enum {
@ -60,14 +62,25 @@ static struct erofs_attr erofs_attr_##_name = { \
EROFS_ATTR_RW_UI(sync_decompress, erofs_mount_opts);
EROFS_ATTR_FUNC(drop_caches, 0200);
#endif
#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
EROFS_ATTR_FUNC(accel, 0644);
#endif
static struct attribute *erofs_attrs[] = {
static struct attribute *erofs_sb_attrs[] = {
#ifdef CONFIG_EROFS_FS_ZIP
ATTR_LIST(sync_decompress),
ATTR_LIST(drop_caches),
#endif
NULL,
};
ATTRIBUTE_GROUPS(erofs_sb);
static struct attribute *erofs_attrs[] = {
#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
ATTR_LIST(accel),
#endif
NULL,
};
ATTRIBUTE_GROUPS(erofs);
/* Features this copy of erofs supports */
@ -128,6 +141,8 @@ static ssize_t erofs_attr_show(struct kobject *kobj,
if (!ptr)
return 0;
return sysfs_emit(buf, "%d\n", *(bool *)ptr);
case attr_accel:
return z_erofs_crypto_show_engines(buf, PAGE_SIZE, '\n');
}
return 0;
}
@ -181,6 +196,19 @@ static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr,
if (t & 1)
invalidate_mapping_pages(MNGD_MAPPING(sbi), 0, -1);
return len;
#endif
#ifdef CONFIG_EROFS_FS_ZIP_ACCEL
case attr_accel:
buf = skip_spaces(buf);
z_erofs_crypto_disable_all_engines();
while (*buf) {
t = strcspn(buf, "\n");
ret = z_erofs_crypto_enable_engine(buf, t);
if (ret < 0)
return ret;
buf += buf[t] != '\0' ? t + 1 : t;
}
return len;
#endif
}
return 0;
@ -199,12 +227,13 @@ static const struct sysfs_ops erofs_attr_ops = {
};
static const struct kobj_type erofs_sb_ktype = {
.default_groups = erofs_groups,
.default_groups = erofs_sb_groups,
.sysfs_ops = &erofs_attr_ops,
.release = erofs_sb_release,
};
static const struct kobj_type erofs_ktype = {
.default_groups = erofs_groups,
.sysfs_ops = &erofs_attr_ops,
};

View File

@ -441,6 +441,7 @@ void z_erofs_exit_subsystem(void)
z_erofs_destroy_pcpu_workers();
destroy_workqueue(z_erofs_workqueue);
z_erofs_destroy_pcluster_pool();
z_erofs_crypto_disable_all_engines();
z_erofs_exit_decompressor();
}