mirror of https://github.com/torvalds/linux.git
CRC updates for 6.15
Another set of improvements to the kernel's CRC (cyclic redundancy
check) code:
- Rework the CRC64 library functions to be directly optimized, like what
I did last cycle for the CRC32 and CRC-T10DIF library functions.
- Rewrite the x86 PCLMULQDQ-optimized CRC code, and add VPCLMULQDQ
support and acceleration for crc64_be and crc64_nvme.
- Rewrite the riscv Zbc-optimized CRC code, and add acceleration for
crc_t10dif, crc64_be, and crc64_nvme.
- Remove crc_t10dif and crc64_rocksoft from the crypto API, since they
are no longer needed there.
- Rename crc64_rocksoft to crc64_nvme, as the old name was incorrect.
- Add kunit test cases for crc64_nvme and crc7.
- Eliminate redundant functions for calculating the Castagnoli CRC32,
settling on just crc32c().
- Remove unnecessary prompts from some of the CRC kconfig options.
- Further optimize the x86 crc32c code.
-----BEGIN PGP SIGNATURE-----
iIoEABYIADIWIQSacvsUNc7UX4ntmEPzXCl4vpKOKwUCZ+CGGhQcZWJpZ2dlcnNA
Z29vZ2xlLmNvbQAKCRDzXCl4vpKOK3wRAP4tbnzawUmlIHIF0hleoADXehUgAhMt
NZn15mGvyiuwIQEA8W9qvnLdFXZkdxhxAEvDDFjyrRauL6eGtr/GvCx4AQY=
=wmKG
-----END PGP SIGNATURE-----
Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux
Pull CRC updates from Eric Biggers:
"Another set of improvements to the kernel's CRC (cyclic redundancy
check) code:
- Rework the CRC64 library functions to be directly optimized, like
what I did last cycle for the CRC32 and CRC-T10DIF library
functions
- Rewrite the x86 PCLMULQDQ-optimized CRC code, and add VPCLMULQDQ
support and acceleration for crc64_be and crc64_nvme
- Rewrite the riscv Zbc-optimized CRC code, and add acceleration for
crc_t10dif, crc64_be, and crc64_nvme
- Remove crc_t10dif and crc64_rocksoft from the crypto API, since
they are no longer needed there
- Rename crc64_rocksoft to crc64_nvme, as the old name was incorrect
- Add kunit test cases for crc64_nvme and crc7
- Eliminate redundant functions for calculating the Castagnoli CRC32,
settling on just crc32c()
- Remove unnecessary prompts from some of the CRC kconfig options
- Further optimize the x86 crc32c code"
* tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (36 commits)
x86/crc: drop the avx10_256 functions and rename avx10_512 to avx512
lib/crc: remove unnecessary prompt for CONFIG_CRC64
lib/crc: remove unnecessary prompt for CONFIG_LIBCRC32C
lib/crc: remove unnecessary prompt for CONFIG_CRC8
lib/crc: remove unnecessary prompt for CONFIG_CRC7
lib/crc: remove unnecessary prompt for CONFIG_CRC4
lib/crc7: unexport crc7_be_syndrome_table
lib/crc_kunit.c: update comment in crc_benchmark()
lib/crc_kunit.c: add test and benchmark for crc7_be()
x86/crc32: optimize tail handling for crc32c short inputs
riscv/crc64: add Zbc optimized CRC64 functions
riscv/crc-t10dif: add Zbc optimized CRC-T10DIF function
riscv/crc32: reimplement the CRC32 functions using new template
riscv/crc: add "template" for Zbc optimized CRC functions
x86/crc: add ANNOTATE_NOENDBR to suppress objtool warnings
x86/crc32: improve crc32c_arch() code generation with clang
x86/crc64: implement crc64_be and crc64_nvme using new template
x86/crc-t10dif: implement crc_t10dif using new template
x86/crc32: implement crc32_le using new template
x86/crc: add "template" for [V]PCLMULQDQ based CRC functions
...
This commit is contained in:
commit
ee6740fd34
|
|
@ -6147,6 +6147,7 @@ F: Documentation/staging/crc*
|
|||
F: arch/*/lib/crc*
|
||||
F: include/linux/crc*
|
||||
F: lib/crc*
|
||||
F: scripts/gen-crc-consts.py
|
||||
|
||||
CREATIVE SB0540
|
||||
M: Bastien Nocera <hadess@hadess.net>
|
||||
|
|
|
|||
|
|
@ -129,7 +129,6 @@ CONFIG_CRYPTO_LZO=y
|
|||
# CONFIG_CRYPTO_ANSI_CPRNG is not set
|
||||
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
|
||||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
# CONFIG_DEBUG_BUGVERBOSE is not set
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
|
|
|
|||
|
|
@ -113,7 +113,6 @@ CONFIG_NFS_FS=y
|
|||
CONFIG_ROOT_NFS=y
|
||||
CONFIG_NLS_CODEPAGE_437=y
|
||||
CONFIG_NLS_ISO8859_1=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_DEBUG_SLAB=y
|
||||
CONFIG_DEBUG_SPINLOCK=y
|
||||
|
|
|
|||
|
|
@ -483,8 +483,6 @@ CONFIG_CRYPTO_DEV_SAHARA=y
|
|||
CONFIG_CRYPTO_DEV_MXS_DCP=y
|
||||
CONFIG_CRC_CCITT=m
|
||||
CONFIG_CRC_T10DIF=y
|
||||
CONFIG_CRC7=m
|
||||
CONFIG_LIBCRC32C=m
|
||||
CONFIG_CMA_SIZE_MBYTES=64
|
||||
CONFIG_FONTS=y
|
||||
CONFIG_FONT_8x8=y
|
||||
|
|
|
|||
|
|
@ -148,7 +148,6 @@ CONFIG_EXT2_FS=y
|
|||
CONFIG_JFFS2_FS=y
|
||||
# CONFIG_NETWORK_FILESYSTEMS is not set
|
||||
CONFIG_CRC_ITU_T=y
|
||||
CONFIG_CRC7=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
# CONFIG_ENABLE_MUST_CHECK is not set
|
||||
# CONFIG_DEBUG_BUGVERBOSE is not set
|
||||
|
|
|
|||
|
|
@ -118,7 +118,6 @@ CONFIG_TMPFS=y
|
|||
CONFIG_CONFIGFS_FS=y
|
||||
CONFIG_JFFS2_FS=y
|
||||
CONFIG_KEYS=y
|
||||
CONFIG_CRC32_BIT=y
|
||||
CONFIG_DMA_API_DEBUG=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
|
|
|
|||
|
|
@ -290,7 +290,6 @@ CONFIG_CRYPTO_CBC=m
|
|||
CONFIG_CRYPTO_PCBC=m
|
||||
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
|
||||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
|
|
|
|||
|
|
@ -188,7 +188,6 @@ CONFIG_CRYPTO_CBC=m
|
|||
CONFIG_CRYPTO_PCBC=m
|
||||
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
|
||||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
|
|
|
|||
|
|
@ -161,7 +161,6 @@ CONFIG_NLS_ISO8859_1=y
|
|||
CONFIG_NLS_ISO8859_15=y
|
||||
CONFIG_CRYPTO_DEV_MXS_DCP=y
|
||||
CONFIG_CRC_ITU_T=m
|
||||
CONFIG_CRC7=m
|
||||
CONFIG_FONTS=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
|
|
|
|||
|
|
@ -221,7 +221,6 @@ CONFIG_CRYPTO_PCBC=y
|
|||
CONFIG_CRYPTO_DEFLATE=y
|
||||
CONFIG_CRYPTO_LZO=y
|
||||
# CONFIG_CRYPTO_ANSI_CPRNG is not set
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_FONTS=y
|
||||
CONFIG_FONT_8x8=y
|
||||
CONFIG_FONT_8x16=y
|
||||
|
|
|
|||
|
|
@ -710,8 +710,6 @@ CONFIG_CRYPTO_DEV_OMAP_DES=m
|
|||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_CRC_T10DIF=y
|
||||
CONFIG_CRC_ITU_T=y
|
||||
CONFIG_CRC7=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_DMA_CMA=y
|
||||
CONFIG_FONTS=y
|
||||
CONFIG_FONT_8x8=y
|
||||
|
|
|
|||
|
|
@ -235,7 +235,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m
|
|||
CONFIG_CRYPTO_SHA512=m
|
||||
CONFIG_CRYPTO_WP512=m
|
||||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_LIBCRC32C=m
|
||||
CONFIG_FONTS=y
|
||||
CONFIG_FONT_8x8=y
|
||||
CONFIG_FONT_8x16=y
|
||||
|
|
|
|||
|
|
@ -75,7 +75,6 @@ CONFIG_EXT3_FS=y
|
|||
# CONFIG_INOTIFY_USER is not set
|
||||
CONFIG_NLS=y
|
||||
CONFIG_CRC_ITU_T=y
|
||||
CONFIG_CRC7=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
# CONFIG_DEBUG_BUGVERBOSE is not set
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
|
|
|
|||
|
|
@ -193,7 +193,6 @@ CONFIG_PKCS7_MESSAGE_PARSER=y
|
|||
CONFIG_SYSTEM_TRUSTED_KEYRING=y
|
||||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_CRC_ITU_T=m
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
|
|
|
|||
|
|
@ -69,12 +69,6 @@ static void __exit crc_t10dif_arm_exit(void)
|
|||
}
|
||||
module_exit(crc_t10dif_arm_exit);
|
||||
|
||||
bool crc_t10dif_is_optimized(void)
|
||||
{
|
||||
return static_key_enabled(&have_neon);
|
||||
}
|
||||
EXPORT_SYMBOL(crc_t10dif_is_optimized);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
|
|
|||
|
|
@ -59,14 +59,14 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
}
|
||||
EXPORT_SYMBOL(crc32_le_arch);
|
||||
|
||||
static u32 crc32c_le_scalar(u32 crc, const u8 *p, size_t len)
|
||||
static u32 crc32c_scalar(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (static_branch_likely(&have_crc32))
|
||||
return crc32c_armv8_le(crc, p, len);
|
||||
return crc32c_le_base(crc, p, len);
|
||||
return crc32c_base(crc, p, len);
|
||||
}
|
||||
|
||||
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (len >= PMULL_MIN_LEN + 15 &&
|
||||
static_branch_likely(&have_pmull) && crypto_simd_usable()) {
|
||||
|
|
@ -74,7 +74,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
|
||||
/* align p to 16-byte boundary */
|
||||
if (n) {
|
||||
crc = crc32c_le_scalar(crc, p, n);
|
||||
crc = crc32c_scalar(crc, p, n);
|
||||
p += n;
|
||||
len -= n;
|
||||
}
|
||||
|
|
@ -85,9 +85,9 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
p += n;
|
||||
len -= n;
|
||||
}
|
||||
return crc32c_le_scalar(crc, p, len);
|
||||
return crc32c_scalar(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc32c_le_arch);
|
||||
EXPORT_SYMBOL(crc32c_arch);
|
||||
|
||||
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -70,12 +70,6 @@ static void __exit crc_t10dif_arm64_exit(void)
|
|||
}
|
||||
module_exit(crc_t10dif_arm64_exit);
|
||||
|
||||
bool crc_t10dif_is_optimized(void)
|
||||
{
|
||||
return static_key_enabled(&have_asimd);
|
||||
}
|
||||
EXPORT_SYMBOL(crc_t10dif_is_optimized);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
|
|||
asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
|
||||
asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
|
||||
|
||||
u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
|
||||
return crc32_le_base(crc, p, len);
|
||||
|
|
@ -43,10 +43,10 @@ u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
}
|
||||
EXPORT_SYMBOL(crc32_le_arch);
|
||||
|
||||
u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
|
||||
return crc32c_le_base(crc, p, len);
|
||||
return crc32c_base(crc, p, len);
|
||||
|
||||
if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
|
||||
kernel_neon_begin();
|
||||
|
|
@ -62,9 +62,9 @@ u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
|
||||
return crc32c_le_arm64(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc32c_le_arch);
|
||||
EXPORT_SYMBOL(crc32c_arch);
|
||||
|
||||
u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
|
||||
return crc32_be_base(crc, p, len);
|
||||
|
|
|
|||
|
|
@ -75,7 +75,6 @@ CONFIG_CRYPTO_MD5=y
|
|||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_CRC16=y
|
||||
CONFIG_CRC_T10DIF=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_FRAME_WARN=0
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_DEBUG_FS=y
|
||||
|
|
|
|||
|
|
@ -65,10 +65,10 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
}
|
||||
EXPORT_SYMBOL(crc32_le_arch);
|
||||
|
||||
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (!static_branch_likely(&have_crc32))
|
||||
return crc32c_le_base(crc, p, len);
|
||||
return crc32c_base(crc, p, len);
|
||||
|
||||
while (len >= sizeof(u64)) {
|
||||
u64 value = get_unaligned_le64(p);
|
||||
|
|
@ -100,7 +100,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
|
||||
return crc;
|
||||
}
|
||||
EXPORT_SYMBOL(crc32c_le_arch);
|
||||
EXPORT_SYMBOL(crc32c_arch);
|
||||
|
||||
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -69,7 +69,6 @@ CONFIG_USB_HCD_BCMA=y
|
|||
CONFIG_USB_HCD_SSB=y
|
||||
CONFIG_LEDS_TRIGGER_TIMER=y
|
||||
CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
|
||||
CONFIG_CRC32_SARWATE=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
CONFIG_DEBUG_INFO_REDUCED=y
|
||||
|
|
|
|||
|
|
@ -239,7 +239,6 @@ CONFIG_CRYPTO_TEA=m
|
|||
CONFIG_CRYPTO_TWOFISH=m
|
||||
CONFIG_CRYPTO_LZO=m
|
||||
CONFIG_CRC_T10DIF=m
|
||||
CONFIG_CRC7=m
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_DEBUG_MEMORY_INIT=y
|
||||
CONFIG_DETECT_HUNG_TASK=y
|
||||
|
|
|
|||
|
|
@ -70,4 +70,3 @@ CONFIG_NFS_FS=y
|
|||
CONFIG_NFS_V3_ACL=y
|
||||
CONFIG_NFSD=y
|
||||
CONFIG_NFSD_V3_ACL=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
|
|
|
|||
|
|
@ -216,7 +216,6 @@ CONFIG_CRYPTO_USER=y
|
|||
CONFIG_CRYPTO_CRYPTD=y
|
||||
CONFIG_CRYPTO_USER_API_HASH=y
|
||||
CONFIG_CRYPTO_USER_API_SKCIPHER=y
|
||||
CONFIG_CRC32_SLICEBY4=y
|
||||
CONFIG_FONTS=y
|
||||
CONFIG_FONT_8x8=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
|
|
|
|||
|
|
@ -180,7 +180,6 @@ CONFIG_CRYPTO_XTS=m
|
|||
CONFIG_CRYPTO_CMAC=m
|
||||
CONFIG_CRYPTO_XCBC=m
|
||||
CONFIG_CRYPTO_CRC32=m
|
||||
CONFIG_CRYPTO_CRCT10DIF=m
|
||||
CONFIG_CRYPTO_MD4=m
|
||||
CONFIG_CRYPTO_MICHAEL_MIC=m
|
||||
CONFIG_CRYPTO_RMD160=m
|
||||
|
|
|
|||
|
|
@ -175,7 +175,6 @@ CONFIG_CRYPTO_XTS=m
|
|||
CONFIG_CRYPTO_CMAC=m
|
||||
CONFIG_CRYPTO_XCBC=m
|
||||
CONFIG_CRYPTO_CRC32=m
|
||||
CONFIG_CRYPTO_CRCT10DIF=m
|
||||
CONFIG_CRYPTO_MD4=m
|
||||
CONFIG_CRYPTO_MICHAEL_MIC=m
|
||||
CONFIG_CRYPTO_RMD160=m
|
||||
|
|
|
|||
|
|
@ -175,7 +175,6 @@ CONFIG_CRYPTO_XTS=m
|
|||
CONFIG_CRYPTO_CMAC=m
|
||||
CONFIG_CRYPTO_XCBC=m
|
||||
CONFIG_CRYPTO_CRC32=m
|
||||
CONFIG_CRYPTO_CRCT10DIF=m
|
||||
CONFIG_CRYPTO_MD4=m
|
||||
CONFIG_CRYPTO_MICHAEL_MIC=m
|
||||
CONFIG_CRYPTO_RMD160=m
|
||||
|
|
|
|||
|
|
@ -219,4 +219,3 @@ CONFIG_CRYPTO_DEFLATE=m
|
|||
CONFIG_CRYPTO_LZO=m
|
||||
# CONFIG_CRYPTO_HW is not set
|
||||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_CRC7=m
|
||||
|
|
|
|||
|
|
@ -178,7 +178,6 @@ CONFIG_CRYPTO_TEA=y
|
|||
CONFIG_CRYPTO_TWOFISH=y
|
||||
CONFIG_CRYPTO_DEFLATE=y
|
||||
CONFIG_CRC_T10DIF=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_FONTS=y
|
||||
CONFIG_FONT_8x8=y
|
||||
CONFIG_FONT_8x16=y
|
||||
|
|
|
|||
|
|
@ -129,7 +129,6 @@ CONFIG_SQUASHFS=y
|
|||
CONFIG_SQUASHFS_XZ=y
|
||||
CONFIG_CRYPTO_ARC4=m
|
||||
CONFIG_CRC_ITU_T=m
|
||||
CONFIG_CRC32_SARWATE=y
|
||||
# CONFIG_XZ_DEC_X86 is not set
|
||||
# CONFIG_XZ_DEC_POWERPC is not set
|
||||
# CONFIG_XZ_DEC_IA64 is not set
|
||||
|
|
|
|||
|
|
@ -141,7 +141,6 @@ CONFIG_SQUASHFS=y
|
|||
CONFIG_SQUASHFS_XZ=y
|
||||
CONFIG_CRYPTO_ARC4=m
|
||||
CONFIG_CRC_ITU_T=m
|
||||
CONFIG_CRC32_SARWATE=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
CONFIG_STRIP_ASM_SYMS=y
|
||||
CONFIG_DEBUG_FS=y
|
||||
|
|
|
|||
|
|
@ -16,15 +16,6 @@
|
|||
#include <asm/mipsregs.h>
|
||||
#include <linux/unaligned.h>
|
||||
|
||||
enum crc_op_size {
|
||||
b, h, w, d,
|
||||
};
|
||||
|
||||
enum crc_type {
|
||||
crc32,
|
||||
crc32c,
|
||||
};
|
||||
|
||||
#ifndef TOOLCHAIN_SUPPORTS_CRC
|
||||
#define _ASM_SET_CRC(OP, SZ, TYPE) \
|
||||
_ASM_MACRO_3R(OP, rt, rs, rt2, \
|
||||
|
|
@ -117,10 +108,10 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
}
|
||||
EXPORT_SYMBOL(crc32_le_arch);
|
||||
|
||||
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (!static_branch_likely(&have_crc32))
|
||||
return crc32c_le_base(crc, p, len);
|
||||
return crc32c_base(crc, p, len);
|
||||
|
||||
if (IS_ENABLED(CONFIG_64BIT)) {
|
||||
for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
|
||||
|
|
@ -158,7 +149,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
}
|
||||
return crc;
|
||||
}
|
||||
EXPORT_SYMBOL(crc32c_le_arch);
|
||||
EXPORT_SYMBOL(crc32c_arch);
|
||||
|
||||
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -293,7 +293,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m
|
|||
CONFIG_CRYPTO_DEFLATE=m
|
||||
# CONFIG_CRYPTO_HW is not set
|
||||
CONFIG_CRC_CCITT=m
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_PRINTK_TIME=y
|
||||
CONFIG_DEBUG_KERNEL=y
|
||||
CONFIG_STRIP_ASM_SYMS=y
|
||||
|
|
|
|||
|
|
@ -223,7 +223,6 @@ CONFIG_NLS_KOI8_U=m
|
|||
CONFIG_NLS_UTF8=y
|
||||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_CRC_T10DIF=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_CRYPTO_CBC=y
|
||||
CONFIG_CRYPTO_MD5=y
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@ CONFIG_TMPFS=y
|
|||
CONFIG_CRAMFS=y
|
||||
CONFIG_NFS_FS=y
|
||||
CONFIG_ROOT_NFS=y
|
||||
CONFIG_CRC32_SLICEBY4=y
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
CONFIG_DEBUG_FS=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
|
|
|
|||
|
|
@ -47,7 +47,6 @@ CONFIG_TMPFS=y
|
|||
CONFIG_CRAMFS=y
|
||||
CONFIG_NFS_FS=y
|
||||
CONFIG_ROOT_NFS=y
|
||||
CONFIG_CRC32_SLICEBY4=y
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_DETECT_HUNG_TASK=y
|
||||
|
|
|
|||
|
|
@ -39,4 +39,3 @@ CONFIG_CRAMFS=y
|
|||
CONFIG_NFS_FS=y
|
||||
CONFIG_ROOT_NFS=y
|
||||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_CRC32_SLICEBY4=y
|
||||
|
|
|
|||
|
|
@ -70,7 +70,6 @@ CONFIG_NFS_FS=y
|
|||
CONFIG_ROOT_NFS=y
|
||||
CONFIG_CRYPTO=y
|
||||
CONFIG_CRYPTO_DEV_TALITOS=y
|
||||
CONFIG_CRC32_SLICEBY4=y
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_DEBUG_FS=y
|
||||
|
|
|
|||
|
|
@ -281,7 +281,6 @@ CONFIG_LSM="yama,loadpin,safesetid,integrity"
|
|||
# CONFIG_CRYPTO_HW is not set
|
||||
CONFIG_CRC16=y
|
||||
CONFIG_CRC_ITU_T=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
# CONFIG_XZ_DEC_X86 is not set
|
||||
# CONFIG_XZ_DEC_IA64 is not set
|
||||
# CONFIG_XZ_DEC_ARM is not set
|
||||
|
|
|
|||
|
|
@ -54,7 +54,6 @@ CONFIG_TMPFS=y
|
|||
CONFIG_CRAMFS=y
|
||||
CONFIG_NFS_FS=y
|
||||
CONFIG_ROOT_NFS=y
|
||||
CONFIG_CRC32_SLICEBY4=y
|
||||
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
|
||||
CONFIG_MAGIC_SYSRQ=y
|
||||
CONFIG_DETECT_HUNG_TASK=y
|
||||
|
|
|
|||
|
|
@ -78,12 +78,6 @@ static void __exit crc_t10dif_powerpc_exit(void)
|
|||
}
|
||||
module_exit(crc_t10dif_powerpc_exit);
|
||||
|
||||
bool crc_t10dif_is_optimized(void)
|
||||
{
|
||||
return static_key_enabled(&have_vec_crypto);
|
||||
}
|
||||
EXPORT_SYMBOL(crc_t10dif_is_optimized);
|
||||
|
||||
MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
|
||||
MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
|
|||
|
|
@ -23,18 +23,18 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
}
|
||||
EXPORT_SYMBOL(crc32_le_arch);
|
||||
|
||||
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
unsigned int prealign;
|
||||
unsigned int tail;
|
||||
|
||||
if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) ||
|
||||
!static_branch_likely(&have_vec_crypto) || !crypto_simd_usable())
|
||||
return crc32c_le_base(crc, p, len);
|
||||
return crc32c_base(crc, p, len);
|
||||
|
||||
if ((unsigned long)p & VMX_ALIGN_MASK) {
|
||||
prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
|
||||
crc = crc32c_le_base(crc, p, prealign);
|
||||
crc = crc32c_base(crc, p, prealign);
|
||||
len -= prealign;
|
||||
p += prealign;
|
||||
}
|
||||
|
|
@ -52,12 +52,12 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
tail = len & VMX_ALIGN_MASK;
|
||||
if (tail) {
|
||||
p += len & ~VMX_ALIGN_MASK;
|
||||
crc = crc32c_le_base(crc, p, tail);
|
||||
crc = crc32c_base(crc, p, tail);
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
EXPORT_SYMBOL(crc32c_le_arch);
|
||||
EXPORT_SYMBOL(crc32c_arch);
|
||||
|
||||
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ config RISCV
|
|||
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
|
||||
select ARCH_HAS_BINFMT_FLAT
|
||||
select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
|
||||
select ARCH_HAS_CRC64 if 64BIT && RISCV_ISA_ZBC
|
||||
select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC
|
||||
select ARCH_HAS_CURRENT_STACK_POINTER
|
||||
select ARCH_HAS_DEBUG_VIRTUAL if MMU
|
||||
select ARCH_HAS_DEBUG_VM_PGTABLE
|
||||
|
|
|
|||
|
|
@ -16,6 +16,11 @@ lib-$(CONFIG_MMU) += uaccess.o
|
|||
lib-$(CONFIG_64BIT) += tishift.o
|
||||
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
|
||||
obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
|
||||
crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o
|
||||
obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o
|
||||
crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o
|
||||
obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o
|
||||
crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o
|
||||
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
|
||||
lib-$(CONFIG_RISCV_ISA_V) += xor.o
|
||||
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
|
||||
|
|
|
|||
|
|
@ -0,0 +1,122 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* CRC constants generated by:
|
||||
*
|
||||
* ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
|
||||
*
|
||||
* Do not edit manually.
|
||||
*/
|
||||
|
||||
struct crc_clmul_consts {
|
||||
unsigned long fold_across_2_longs_const_hi;
|
||||
unsigned long fold_across_2_longs_const_lo;
|
||||
unsigned long barrett_reduction_const_1;
|
||||
unsigned long barrett_reduction_const_2;
|
||||
};
|
||||
|
||||
/*
|
||||
* Constants generated for most-significant-bit-first CRC-16 using
|
||||
* G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
|
||||
*/
|
||||
static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = {
|
||||
#ifdef CONFIG_64BIT
|
||||
.fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */
|
||||
.fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */
|
||||
.barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */
|
||||
.barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */
|
||||
#else
|
||||
.fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */
|
||||
.fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */
|
||||
.barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */
|
||||
.barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Constants generated for most-significant-bit-first CRC-32 using
|
||||
* G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
|
||||
* x^5 + x^4 + x^2 + x^1 + x^0
|
||||
*/
|
||||
static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = {
|
||||
#ifdef CONFIG_64BIT
|
||||
.fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */
|
||||
.fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */
|
||||
.barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */
|
||||
.barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */
|
||||
#else
|
||||
.fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */
|
||||
.fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */
|
||||
.barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */
|
||||
.barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Constants generated for least-significant-bit-first CRC-32 using
|
||||
* G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
|
||||
* x^5 + x^4 + x^2 + x^1 + x^0
|
||||
*/
|
||||
static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = {
|
||||
#ifdef CONFIG_64BIT
|
||||
.fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */
|
||||
.fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */
|
||||
.barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */
|
||||
.barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */
|
||||
#else
|
||||
.fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */
|
||||
.fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */
|
||||
.barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */
|
||||
.barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Constants generated for least-significant-bit-first CRC-32 using
|
||||
* G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 +
|
||||
* x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0
|
||||
*/
|
||||
static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = {
|
||||
#ifdef CONFIG_64BIT
|
||||
.fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */
|
||||
.fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */
|
||||
.barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */
|
||||
.barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */
|
||||
#else
|
||||
.fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */
|
||||
.fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */
|
||||
.barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */
|
||||
.barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Constants generated for most-significant-bit-first CRC-64 using
|
||||
* G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
|
||||
* x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
|
||||
* x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
|
||||
* x^7 + x^4 + x^1 + x^0
|
||||
*/
|
||||
#ifdef CONFIG_64BIT
|
||||
static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = {
|
||||
.fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */
|
||||
.fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */
|
||||
.barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */
|
||||
.barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Constants generated for least-significant-bit-first CRC-64 using
|
||||
* G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
|
||||
* x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
|
||||
* x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
|
||||
* x^4 + x^3 + x^0
|
||||
*/
|
||||
#ifdef CONFIG_64BIT
|
||||
static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = {
|
||||
.fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */
|
||||
.fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */
|
||||
.barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */
|
||||
.barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */
|
||||
};
|
||||
#endif
|
||||
|
|
@ -0,0 +1,265 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Copyright 2025 Google LLC */
|
||||
|
||||
/*
|
||||
* This file is a "template" that generates a CRC function optimized using the
|
||||
* RISC-V Zbc (scalar carryless multiplication) extension. The includer of this
|
||||
* file must define the following parameters to specify the type of CRC:
|
||||
*
|
||||
* crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC
|
||||
* LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural
|
||||
* mapping between bits and polynomial coefficients
|
||||
* 1 for a lsb (least-significant-bit) first CRC, i.e. reflected
|
||||
* mapping between bits and polynomial coefficients
|
||||
*/
|
||||
|
||||
#include <asm/byteorder.h>
|
||||
#include <linux/minmax.h>
|
||||
|
||||
#define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */
|
||||
|
||||
static inline unsigned long clmul(unsigned long a, unsigned long b)
|
||||
{
|
||||
unsigned long res;
|
||||
|
||||
asm(".option push\n"
|
||||
".option arch,+zbc\n"
|
||||
"clmul %0, %1, %2\n"
|
||||
".option pop\n"
|
||||
: "=r" (res) : "r" (a), "r" (b));
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline unsigned long clmulh(unsigned long a, unsigned long b)
|
||||
{
|
||||
unsigned long res;
|
||||
|
||||
asm(".option push\n"
|
||||
".option arch,+zbc\n"
|
||||
"clmulh %0, %1, %2\n"
|
||||
".option pop\n"
|
||||
: "=r" (res) : "r" (a), "r" (b));
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline unsigned long clmulr(unsigned long a, unsigned long b)
|
||||
{
|
||||
unsigned long res;
|
||||
|
||||
asm(".option push\n"
|
||||
".option arch,+zbc\n"
|
||||
"clmulr %0, %1, %2\n"
|
||||
".option pop\n"
|
||||
: "=r" (res) : "r" (a), "r" (b));
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* crc_load_long() loads one "unsigned long" of aligned data bytes, producing a
|
||||
* polynomial whose bit order matches the CRC's bit order.
|
||||
*/
|
||||
#ifdef CONFIG_64BIT
|
||||
# if LSB_CRC
|
||||
# define crc_load_long(x) le64_to_cpup(x)
|
||||
# else
|
||||
# define crc_load_long(x) be64_to_cpup(x)
|
||||
# endif
|
||||
#else
|
||||
# if LSB_CRC
|
||||
# define crc_load_long(x) le32_to_cpup(x)
|
||||
# else
|
||||
# define crc_load_long(x) be32_to_cpup(x)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* XOR @crc into the end of @msgpoly that represents the high-order terms. */
|
||||
static inline unsigned long
|
||||
crc_clmul_prep(crc_t crc, unsigned long msgpoly)
|
||||
{
|
||||
#if LSB_CRC
|
||||
return msgpoly ^ crc;
|
||||
#else
|
||||
return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS));
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it
|
||||
* modulo the generator polynomial G. This gives the CRC of @msgpoly.
|
||||
*/
|
||||
static inline crc_t
|
||||
crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts)
|
||||
{
|
||||
unsigned long tmp;
|
||||
|
||||
/*
|
||||
* First step of Barrett reduction with integrated multiplication by
|
||||
* x^n: calculate floor((msgpoly * x^n) / G). This is the value by
|
||||
* which G needs to be multiplied to cancel out the x^n and higher terms
|
||||
* of msgpoly * x^n. Do it using the following formula:
|
||||
*
|
||||
* msb-first:
|
||||
* floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1))
|
||||
* lsb-first:
|
||||
* floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG)
|
||||
*
|
||||
* barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G),
|
||||
* which fits a long exactly. Using any lower power of x there would
|
||||
* not carry enough precision through the calculation, while using any
|
||||
* higher power of x would require extra instructions to handle a wider
|
||||
* multiplication. In the msb-first case, using this power of x results
|
||||
* in needing a floored division by x^(BITS_PER_LONG-1), which matches
|
||||
* what clmulr produces. In the lsb-first case, a factor of x gets
|
||||
* implicitly introduced by each carryless multiplication (shown as
|
||||
* '* x' above), and the floored division instead needs to be by
|
||||
* x^BITS_PER_LONG which matches what clmul produces.
|
||||
*/
|
||||
#if LSB_CRC
|
||||
tmp = clmul(msgpoly, consts->barrett_reduction_const_1);
|
||||
#else
|
||||
tmp = clmulr(msgpoly, consts->barrett_reduction_const_1);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Second step of Barrett reduction:
|
||||
*
|
||||
* crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G))
|
||||
*
|
||||
* This reduces (msgpoly * x^n) modulo G by adding the appropriate
|
||||
* multiple of G to it. The result uses only the x^0..x^(n-1) terms.
|
||||
* HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those
|
||||
* terms in the first place, it is more efficient to do the equivalent:
|
||||
*
|
||||
* crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n
|
||||
*
|
||||
* In the lsb-first case further modify it to the following which avoids
|
||||
* a shift, as the crc ends up in the physically low n bits from clmulr:
|
||||
*
|
||||
* product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x
|
||||
* crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n
|
||||
*
|
||||
* barrett_reduction_const_2 contains the constant multiplier (G - x^n)
|
||||
* or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The
|
||||
* cast of the result to crc_t is essential, as it applies the mod x^n!
|
||||
*/
|
||||
#if LSB_CRC
|
||||
return clmulr(tmp, consts->barrett_reduction_const_2);
|
||||
#else
|
||||
return clmul(tmp, consts->barrett_reduction_const_2);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Update @crc with the data from @msgpoly. */
|
||||
static inline crc_t
|
||||
crc_clmul_update_long(crc_t crc, unsigned long msgpoly,
|
||||
const struct crc_clmul_consts *consts)
|
||||
{
|
||||
return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts);
|
||||
}
|
||||
|
||||
/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */
|
||||
static inline crc_t
|
||||
crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len,
|
||||
const struct crc_clmul_consts *consts)
|
||||
{
|
||||
unsigned long msgpoly;
|
||||
size_t i;
|
||||
|
||||
#if LSB_CRC
|
||||
msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8);
|
||||
for (i = 1; i < len; i++)
|
||||
msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8));
|
||||
#else
|
||||
msgpoly = p[0];
|
||||
for (i = 1; i < len; i++)
|
||||
msgpoly = (msgpoly << 8) ^ p[i];
|
||||
#endif
|
||||
|
||||
if (len >= sizeof(crc_t)) {
|
||||
#if LSB_CRC
|
||||
msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
|
||||
#else
|
||||
msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS);
|
||||
#endif
|
||||
return crc_clmul_long(msgpoly, consts);
|
||||
}
|
||||
#if LSB_CRC
|
||||
msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
|
||||
return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len));
|
||||
#else
|
||||
msgpoly ^= crc >> (CRC_BITS - 8*len);
|
||||
return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline crc_t
|
||||
crc_clmul(crc_t crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts)
|
||||
{
|
||||
size_t align;
|
||||
|
||||
/* This implementation assumes that the CRC fits in an unsigned long. */
|
||||
BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long));
|
||||
|
||||
/* If the buffer is not long-aligned, align it. */
|
||||
align = (unsigned long)p % sizeof(unsigned long);
|
||||
if (align && len) {
|
||||
align = min(sizeof(unsigned long) - align, len);
|
||||
crc = crc_clmul_update_partial(crc, p, align, consts);
|
||||
p += align;
|
||||
len -= align;
|
||||
}
|
||||
|
||||
if (len >= 4 * sizeof(unsigned long)) {
|
||||
unsigned long m0, m1;
|
||||
|
||||
m0 = crc_clmul_prep(crc, crc_load_long(p));
|
||||
m1 = crc_load_long(p + sizeof(unsigned long));
|
||||
p += 2 * sizeof(unsigned long);
|
||||
len -= 2 * sizeof(unsigned long);
|
||||
/*
|
||||
* Main loop. Each iteration starts with a message polynomial
|
||||
* (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two
|
||||
* more longs of data to form x^(3*BITS_PER_LONG)*m0 +
|
||||
* x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then
|
||||
* "folds" that back into a congruent (modulo G) value that uses
|
||||
* just m0 and m1 again. This is done by multiplying m0 by the
|
||||
* precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by
|
||||
* the precomputed constant (x^(2*BITS_PER_LONG) mod G), then
|
||||
* adding the results to m2 and m3 as appropriate. Each such
|
||||
* multiplication produces a result twice the length of a long,
|
||||
* which in RISC-V is two instructions clmul and clmulh.
|
||||
*
|
||||
* This could be changed to fold across more than 2 longs at a
|
||||
* time if there is a CPU that can take advantage of it.
|
||||
*/
|
||||
do {
|
||||
unsigned long p0, p1, p2, p3;
|
||||
|
||||
p0 = clmulh(m0, consts->fold_across_2_longs_const_hi);
|
||||
p1 = clmul(m0, consts->fold_across_2_longs_const_hi);
|
||||
p2 = clmulh(m1, consts->fold_across_2_longs_const_lo);
|
||||
p3 = clmul(m1, consts->fold_across_2_longs_const_lo);
|
||||
m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p);
|
||||
m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^
|
||||
crc_load_long(p + sizeof(unsigned long));
|
||||
|
||||
p += 2 * sizeof(unsigned long);
|
||||
len -= 2 * sizeof(unsigned long);
|
||||
} while (len >= 2 * sizeof(unsigned long));
|
||||
|
||||
crc = crc_clmul_long(m0, consts);
|
||||
crc = crc_clmul_update_long(crc, m1, consts);
|
||||
}
|
||||
|
||||
while (len >= sizeof(unsigned long)) {
|
||||
crc = crc_clmul_update_long(crc, crc_load_long(p), consts);
|
||||
p += sizeof(unsigned long);
|
||||
len -= sizeof(unsigned long);
|
||||
}
|
||||
|
||||
if (len)
|
||||
crc = crc_clmul_update_partial(crc, p, len, consts);
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/* Copyright 2025 Google LLC */
|
||||
|
||||
#ifndef _RISCV_CRC_CLMUL_H
|
||||
#define _RISCV_CRC_CLMUL_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include "crc-clmul-consts.h"
|
||||
|
||||
u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts);
|
||||
u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts);
|
||||
u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts);
|
||||
#ifdef CONFIG_64BIT
|
||||
u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts);
|
||||
u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts);
|
||||
#endif
|
||||
|
||||
#endif /* _RISCV_CRC_CLMUL_H */
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* RISC-V optimized CRC-T10DIF function
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/alternative-macros.h>
|
||||
#include <linux/crc-t10dif.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "crc-clmul.h"
|
||||
|
||||
u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
|
||||
return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts);
|
||||
return crc_t10dif_generic(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc_t10dif_arch);
|
||||
|
||||
MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* RISC-V optimized most-significant-bit-first CRC16
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include "crc-clmul.h"
|
||||
|
||||
typedef u16 crc_t;
|
||||
#define LSB_CRC 0
|
||||
#include "crc-clmul-template.h"
|
||||
|
||||
u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts)
|
||||
{
|
||||
return crc_clmul(crc, p, len, consts);
|
||||
}
|
||||
|
|
@ -1,311 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Accelerated CRC32 implementation with Zbc extension.
|
||||
*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/alternative-macros.h>
|
||||
#include <asm/byteorder.h>
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/crc32poly.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/byteorder/generic.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/*
|
||||
* Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
|
||||
* better understanding of how this math works.
|
||||
*
|
||||
* let "+" denotes polynomial add (XOR)
|
||||
* let "-" denotes polynomial sub (XOR)
|
||||
* let "*" denotes polynomial multiplication
|
||||
* let "/" denotes polynomial floor division
|
||||
* let "S" denotes source data, XLEN bit wide
|
||||
* let "P" denotes CRC32 polynomial
|
||||
* let "T" denotes 2^(XLEN+32)
|
||||
* let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
|
||||
*
|
||||
* crc32(S, P)
|
||||
* => S * (2^32) - S * (2^32) / P * P
|
||||
* => lowest 32 bits of: S * (2^32) / P * P
|
||||
* => lowest 32 bits of: S * (2^32) * (T / P) / T * P
|
||||
* => lowest 32 bits of: S * (2^32) * quotient / T * P
|
||||
* => lowest 32 bits of: S * quotient / 2^XLEN * P
|
||||
* => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
|
||||
* => clmul_low_part(clmul_high_part(S, QT) + S, P)
|
||||
*
|
||||
* In terms of below implementations, the BE case is more intuitive, since the
|
||||
* higher order bit sits at more significant position.
|
||||
*/
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
/* Slide by XLEN bits per iteration */
|
||||
# define STEP_ORDER 3
|
||||
|
||||
/* Each below polynomial quotient has an implicit bit for 2^XLEN */
|
||||
|
||||
/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
|
||||
# define CRC32_POLY_QT_LE 0x5a72d812fb808b20
|
||||
|
||||
/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
|
||||
# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8
|
||||
|
||||
/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
|
||||
* the same as the bit-reversed version of CRC32_POLY_QT_LE
|
||||
*/
|
||||
# define CRC32_POLY_QT_BE 0x04d101df481b4e5a
|
||||
|
||||
static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
|
||||
{
|
||||
return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
|
||||
}
|
||||
|
||||
static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
|
||||
{
|
||||
u32 crc;
|
||||
|
||||
/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
|
||||
asm volatile (".option push\n"
|
||||
".option arch,+zbc\n"
|
||||
"clmul %0, %1, %2\n"
|
||||
"slli %0, %0, 1\n"
|
||||
"xor %0, %0, %1\n"
|
||||
"clmulr %0, %0, %3\n"
|
||||
"srli %0, %0, 32\n"
|
||||
".option pop\n"
|
||||
: "=&r" (crc)
|
||||
: "r" (s),
|
||||
"r" (poly_qt),
|
||||
"r" ((u64)poly << 32)
|
||||
:);
|
||||
return crc;
|
||||
}
|
||||
|
||||
static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
|
||||
{
|
||||
return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
|
||||
}
|
||||
|
||||
#elif __riscv_xlen == 32
|
||||
# define STEP_ORDER 2
|
||||
/* Each quotient should match the upper half of its analog in RV64 */
|
||||
# define CRC32_POLY_QT_LE 0xfb808b20
|
||||
# define CRC32C_POLY_QT_LE 0x6f5389f8
|
||||
# define CRC32_POLY_QT_BE 0x04d101df
|
||||
|
||||
static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
|
||||
{
|
||||
return crc ^ (__force u32)__cpu_to_le32(*ptr);
|
||||
}
|
||||
|
||||
static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
|
||||
{
|
||||
u32 crc;
|
||||
|
||||
/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
|
||||
asm volatile (".option push\n"
|
||||
".option arch,+zbc\n"
|
||||
"clmul %0, %1, %2\n"
|
||||
"slli %0, %0, 1\n"
|
||||
"xor %0, %0, %1\n"
|
||||
"clmulr %0, %0, %3\n"
|
||||
".option pop\n"
|
||||
: "=&r" (crc)
|
||||
: "r" (s),
|
||||
"r" (poly_qt),
|
||||
"r" (poly)
|
||||
:);
|
||||
return crc;
|
||||
}
|
||||
|
||||
static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
|
||||
{
|
||||
return crc ^ (__force u32)__cpu_to_be32(*ptr);
|
||||
}
|
||||
|
||||
#else
|
||||
# error "Unexpected __riscv_xlen"
|
||||
#endif
|
||||
|
||||
static inline u32 crc32_be_zbc(unsigned long s)
|
||||
{
|
||||
u32 crc;
|
||||
|
||||
asm volatile (".option push\n"
|
||||
".option arch,+zbc\n"
|
||||
"clmulh %0, %1, %2\n"
|
||||
"xor %0, %0, %1\n"
|
||||
"clmul %0, %0, %3\n"
|
||||
".option pop\n"
|
||||
: "=&r" (crc)
|
||||
: "r" (s),
|
||||
"r" (CRC32_POLY_QT_BE),
|
||||
"r" (CRC32_POLY_BE)
|
||||
:);
|
||||
return crc;
|
||||
}
|
||||
|
||||
#define STEP (1 << STEP_ORDER)
|
||||
#define OFFSET_MASK (STEP - 1)
|
||||
|
||||
typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
|
||||
|
||||
static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
|
||||
size_t len, u32 poly,
|
||||
unsigned long poly_qt)
|
||||
{
|
||||
size_t bits = len * 8;
|
||||
unsigned long s = 0;
|
||||
u32 crc_low = 0;
|
||||
|
||||
for (int i = 0; i < len; i++)
|
||||
s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
|
||||
|
||||
s ^= (unsigned long)crc << (__riscv_xlen - bits);
|
||||
if (__riscv_xlen == 32 || len < sizeof(u32))
|
||||
crc_low = crc >> bits;
|
||||
|
||||
crc = crc32_le_zbc(s, poly, poly_qt);
|
||||
crc ^= crc_low;
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
|
||||
size_t len, u32 poly,
|
||||
unsigned long poly_qt,
|
||||
fallback crc_fb)
|
||||
{
|
||||
size_t offset, head_len, tail_len;
|
||||
unsigned long const *p_ul;
|
||||
unsigned long s;
|
||||
|
||||
asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
|
||||
RISCV_ISA_EXT_ZBC, 1)
|
||||
: : : : legacy);
|
||||
|
||||
/* Handle the unaligned head. */
|
||||
offset = (unsigned long)p & OFFSET_MASK;
|
||||
if (offset && len) {
|
||||
head_len = min(STEP - offset, len);
|
||||
crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
|
||||
p += head_len;
|
||||
len -= head_len;
|
||||
}
|
||||
|
||||
tail_len = len & OFFSET_MASK;
|
||||
len = len >> STEP_ORDER;
|
||||
p_ul = (unsigned long const *)p;
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
s = crc32_le_prep(crc, p_ul);
|
||||
crc = crc32_le_zbc(s, poly, poly_qt);
|
||||
p_ul++;
|
||||
}
|
||||
|
||||
/* Handle the tail bytes. */
|
||||
p = (unsigned char const *)p_ul;
|
||||
if (tail_len)
|
||||
crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
|
||||
|
||||
return crc;
|
||||
|
||||
legacy:
|
||||
return crc_fb(crc, p, len);
|
||||
}
|
||||
|
||||
u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
|
||||
crc32_le_base);
|
||||
}
|
||||
EXPORT_SYMBOL(crc32_le_arch);
|
||||
|
||||
u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
|
||||
CRC32C_POLY_QT_LE, crc32c_le_base);
|
||||
}
|
||||
EXPORT_SYMBOL(crc32c_le_arch);
|
||||
|
||||
static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
|
||||
size_t len)
|
||||
{
|
||||
size_t bits = len * 8;
|
||||
unsigned long s = 0;
|
||||
u32 crc_low = 0;
|
||||
|
||||
s = 0;
|
||||
for (int i = 0; i < len; i++)
|
||||
s = *p++ | (s << 8);
|
||||
|
||||
if (__riscv_xlen == 32 || len < sizeof(u32)) {
|
||||
s ^= crc >> (32 - bits);
|
||||
crc_low = crc << bits;
|
||||
} else {
|
||||
s ^= (unsigned long)crc << (bits - 32);
|
||||
}
|
||||
|
||||
crc = crc32_be_zbc(s);
|
||||
crc ^= crc_low;
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
size_t offset, head_len, tail_len;
|
||||
unsigned long const *p_ul;
|
||||
unsigned long s;
|
||||
|
||||
asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
|
||||
RISCV_ISA_EXT_ZBC, 1)
|
||||
: : : : legacy);
|
||||
|
||||
/* Handle the unaligned head. */
|
||||
offset = (unsigned long)p & OFFSET_MASK;
|
||||
if (offset && len) {
|
||||
head_len = min(STEP - offset, len);
|
||||
crc = crc32_be_unaligned(crc, p, head_len);
|
||||
p += head_len;
|
||||
len -= head_len;
|
||||
}
|
||||
|
||||
tail_len = len & OFFSET_MASK;
|
||||
len = len >> STEP_ORDER;
|
||||
p_ul = (unsigned long const *)p;
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
s = crc32_be_prep(crc, p_ul);
|
||||
crc = crc32_be_zbc(s);
|
||||
p_ul++;
|
||||
}
|
||||
|
||||
/* Handle the tail bytes. */
|
||||
p = (unsigned char const *)p_ul;
|
||||
if (tail_len)
|
||||
crc = crc32_be_unaligned(crc, p, tail_len);
|
||||
|
||||
return crc;
|
||||
|
||||
legacy:
|
||||
return crc32_be_base(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc32_be_arch);
|
||||
|
||||
u32 crc32_optimizations(void)
|
||||
{
|
||||
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
|
||||
return CRC32_LE_OPTIMIZATION |
|
||||
CRC32_BE_OPTIMIZATION |
|
||||
CRC32C_OPTIMIZATION;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(crc32_optimizations);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* RISC-V optimized CRC32 functions
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/alternative-macros.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "crc-clmul.h"
|
||||
|
||||
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
|
||||
return crc32_lsb_clmul(crc, p, len,
|
||||
&crc32_lsb_0xedb88320_consts);
|
||||
return crc32_le_base(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc32_le_arch);
|
||||
|
||||
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
|
||||
return crc32_msb_clmul(crc, p, len,
|
||||
&crc32_msb_0x04c11db7_consts);
|
||||
return crc32_be_base(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc32_be_arch);
|
||||
|
||||
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
|
||||
return crc32_lsb_clmul(crc, p, len,
|
||||
&crc32_lsb_0x82f63b78_consts);
|
||||
return crc32c_base(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc32c_arch);
|
||||
|
||||
u32 crc32_optimizations(void)
|
||||
{
|
||||
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
|
||||
return CRC32_LE_OPTIMIZATION |
|
||||
CRC32_BE_OPTIMIZATION |
|
||||
CRC32C_OPTIMIZATION;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(crc32_optimizations);
|
||||
|
||||
MODULE_DESCRIPTION("RISC-V optimized CRC32 functions");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* RISC-V optimized least-significant-bit-first CRC32
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include "crc-clmul.h"
|
||||
|
||||
typedef u32 crc_t;
|
||||
#define LSB_CRC 1
|
||||
#include "crc-clmul-template.h"
|
||||
|
||||
u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts)
|
||||
{
|
||||
return crc_clmul(crc, p, len, consts);
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* RISC-V optimized most-significant-bit-first CRC32
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include "crc-clmul.h"
|
||||
|
||||
typedef u32 crc_t;
|
||||
#define LSB_CRC 0
|
||||
#include "crc-clmul-template.h"
|
||||
|
||||
u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts)
|
||||
{
|
||||
return crc_clmul(crc, p, len, consts);
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* RISC-V optimized CRC64 functions
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/alternative-macros.h>
|
||||
#include <linux/crc64.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "crc-clmul.h"
|
||||
|
||||
u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
|
||||
return crc64_msb_clmul(crc, p, len,
|
||||
&crc64_msb_0x42f0e1eba9ea3693_consts);
|
||||
return crc64_be_generic(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc64_be_arch);
|
||||
|
||||
u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
|
||||
return crc64_lsb_clmul(crc, p, len,
|
||||
&crc64_lsb_0x9a6c9329ac4bc9b5_consts);
|
||||
return crc64_nvme_generic(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc64_nvme_arch);
|
||||
|
||||
MODULE_DESCRIPTION("RISC-V optimized CRC64 functions");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* RISC-V optimized least-significant-bit-first CRC64
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include "crc-clmul.h"
|
||||
|
||||
typedef u64 crc_t;
|
||||
#define LSB_CRC 1
|
||||
#include "crc-clmul-template.h"
|
||||
|
||||
u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts)
|
||||
{
|
||||
return crc_clmul(crc, p, len, consts);
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* RISC-V optimized most-significant-bit-first CRC64
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include "crc-clmul.h"
|
||||
|
||||
typedef u64 crc_t;
|
||||
#define LSB_CRC 0
|
||||
#include "crc-clmul-template.h"
|
||||
|
||||
u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
|
||||
const struct crc_clmul_consts *consts)
|
||||
{
|
||||
return crc_clmul(crc, p, len, consts);
|
||||
}
|
||||
|
|
@ -815,9 +815,6 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y
|
|||
CONFIG_CORDIC=m
|
||||
CONFIG_CRYPTO_LIB_CURVE25519=m
|
||||
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
|
||||
CONFIG_CRC4=m
|
||||
CONFIG_CRC7=m
|
||||
CONFIG_CRC8=m
|
||||
CONFIG_RANDOM32_SELFTEST=y
|
||||
CONFIG_XZ_DEC_MICROLZMA=y
|
||||
CONFIG_DMA_CMA=y
|
||||
|
|
|
|||
|
|
@ -803,9 +803,6 @@ CONFIG_CORDIC=m
|
|||
CONFIG_PRIME_NUMBERS=m
|
||||
CONFIG_CRYPTO_LIB_CURVE25519=m
|
||||
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
|
||||
CONFIG_CRC4=m
|
||||
CONFIG_CRC7=m
|
||||
CONFIG_CRC8=m
|
||||
CONFIG_XZ_DEC_MICROLZMA=y
|
||||
CONFIG_DMA_CMA=y
|
||||
CONFIG_CMA_SIZE_MBYTES=0
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs);
|
|||
|
||||
DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
|
||||
DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
|
||||
DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base)
|
||||
DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
|
||||
|
||||
static int __init crc32_s390_init(void)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -104,5 +104,3 @@ CONFIG_CRYPTO_LZO=y
|
|||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_CRC16=y
|
||||
CONFIG_CRC_ITU_T=y
|
||||
CONFIG_CRC7=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
|
|
|
|||
|
|
@ -195,4 +195,3 @@ CONFIG_CRYPTO_LZO=y
|
|||
# CONFIG_CRYPTO_HW is not set
|
||||
CONFIG_CRC_CCITT=y
|
||||
CONFIG_CRC16=y
|
||||
CONFIG_LIBCRC32C=y
|
||||
|
|
|
|||
|
|
@ -266,4 +266,3 @@ CONFIG_CRYPTO_TEA=m
|
|||
CONFIG_CRYPTO_TWOFISH=m
|
||||
# CONFIG_CRYPTO_ANSI_CPRNG is not set
|
||||
CONFIG_CRC16=m
|
||||
CONFIG_LIBCRC32C=m
|
||||
|
|
|
|||
|
|
@ -94,4 +94,3 @@ CONFIG_CRYPTO_SERPENT=m
|
|||
CONFIG_CRYPTO_TWOFISH=m
|
||||
# CONFIG_CRYPTO_ANSI_CPRNG is not set
|
||||
# CONFIG_CRYPTO_HW is not set
|
||||
CONFIG_LIBCRC32C=m
|
||||
|
|
|
|||
|
|
@ -230,7 +230,6 @@ CONFIG_CRYPTO_TEA=m
|
|||
CONFIG_CRYPTO_TWOFISH=m
|
||||
# CONFIG_CRYPTO_ANSI_CPRNG is not set
|
||||
CONFIG_CRC16=m
|
||||
CONFIG_LIBCRC32C=m
|
||||
CONFIG_VCC=m
|
||||
CONFIG_PATA_CMD64X=y
|
||||
CONFIG_IP_PNP=y
|
||||
|
|
|
|||
|
|
@ -27,17 +27,17 @@ EXPORT_SYMBOL(crc32_le_arch);
|
|||
|
||||
void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len);
|
||||
|
||||
u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len)
|
||||
u32 crc32c_arch(u32 crc, const u8 *data, size_t len)
|
||||
{
|
||||
size_t n = -(uintptr_t)data & 7;
|
||||
|
||||
if (!static_branch_likely(&have_crc32c_opcode))
|
||||
return crc32c_le_base(crc, data, len);
|
||||
return crc32c_base(crc, data, len);
|
||||
|
||||
if (n) {
|
||||
/* Data isn't 8-byte aligned. Align it. */
|
||||
n = min(n, len);
|
||||
crc = crc32c_le_base(crc, data, n);
|
||||
crc = crc32c_base(crc, data, n);
|
||||
data += n;
|
||||
len -= n;
|
||||
}
|
||||
|
|
@ -48,10 +48,10 @@ u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len)
|
|||
len -= n;
|
||||
}
|
||||
if (len)
|
||||
crc = crc32c_le_base(crc, data, len);
|
||||
crc = crc32c_base(crc, data, len);
|
||||
return crc;
|
||||
}
|
||||
EXPORT_SYMBOL(crc32c_le_arch);
|
||||
EXPORT_SYMBOL(crc32c_arch);
|
||||
|
||||
u32 crc32_be_arch(u32 crc, const u8 *data, size_t len)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -77,7 +77,8 @@ config X86
|
|||
select ARCH_HAS_CPU_FINALIZE_INIT
|
||||
select ARCH_HAS_CPU_PASID if IOMMU_SVA
|
||||
select ARCH_HAS_CRC32
|
||||
select ARCH_HAS_CRC_T10DIF if X86_64
|
||||
select ARCH_HAS_CRC64 if X86_64
|
||||
select ARCH_HAS_CRC_T10DIF
|
||||
select ARCH_HAS_CURRENT_STACK_POINTER
|
||||
select ARCH_HAS_DEBUG_VIRTUAL
|
||||
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
|
||||
|
|
|
|||
|
|
@ -1536,26 +1536,6 @@ DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
|
|||
AES_GCM_KEY_AVX10_SIZE, 800);
|
||||
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
|
||||
|
||||
/*
|
||||
* This is a list of CPU models that are known to suffer from downclocking when
|
||||
* zmm registers (512-bit vectors) are used. On these CPUs, the AES mode
|
||||
* implementations with zmm registers won't be used by default. Implementations
|
||||
* with ymm registers (256-bit vectors) will be used by default instead.
|
||||
*/
|
||||
static const struct x86_cpu_id zmm_exclusion_list[] = {
|
||||
X86_MATCH_VFM(INTEL_SKYLAKE_X, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0),
|
||||
X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0),
|
||||
X86_MATCH_VFM(INTEL_TIGERLAKE, 0),
|
||||
/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
|
||||
/* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
|
||||
{},
|
||||
};
|
||||
|
||||
static int __init register_avx_algs(void)
|
||||
{
|
||||
int err;
|
||||
|
|
@ -1600,7 +1580,7 @@ static int __init register_avx_algs(void)
|
|||
if (err)
|
||||
return err;
|
||||
|
||||
if (x86_match_cpu(zmm_exclusion_list)) {
|
||||
if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
|
||||
int i;
|
||||
|
||||
aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;
|
||||
|
|
|
|||
|
|
@ -480,6 +480,7 @@
|
|||
#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
|
||||
#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
|
||||
#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
|
||||
#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
|
|
|
|||
|
|
@ -512,6 +512,25 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
|
|||
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a list of Intel CPUs that are known to suffer from downclocking when
|
||||
* ZMM registers (512-bit vectors) are used. On these CPUs, when the kernel
|
||||
* executes SIMD-optimized code such as cryptography functions or CRCs, it
|
||||
* should prefer 256-bit (YMM) code to 512-bit (ZMM) code.
|
||||
*/
|
||||
static const struct x86_cpu_id zmm_exclusion_list[] = {
|
||||
X86_MATCH_VFM(INTEL_SKYLAKE_X, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
|
||||
X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0),
|
||||
X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0),
|
||||
X86_MATCH_VFM(INTEL_TIGERLAKE, 0),
|
||||
/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
|
||||
{},
|
||||
};
|
||||
|
||||
static void init_intel(struct cpuinfo_x86 *c)
|
||||
{
|
||||
early_init_intel(c);
|
||||
|
|
@ -590,6 +609,9 @@ static void init_intel(struct cpuinfo_x86 *c)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (x86_match_cpu(zmm_exclusion_list))
|
||||
set_cpu_cap(c, X86_FEATURE_PREFER_YMM);
|
||||
|
||||
/* Work around errata */
|
||||
srat_detect_node(c);
|
||||
|
||||
|
|
|
|||
|
|
@ -42,8 +42,11 @@ obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o
|
|||
crc32-x86-y := crc32-glue.o crc32-pclmul.o
|
||||
crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o
|
||||
|
||||
obj-$(CONFIG_CRC64_ARCH) += crc64-x86.o
|
||||
crc64-x86-y := crc64-glue.o crc64-pclmul.o
|
||||
|
||||
obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-x86.o
|
||||
crc-t10dif-x86-y := crc-t10dif-glue.o crct10dif-pcl-asm_64.o
|
||||
crc-t10dif-x86-y := crc-t10dif-glue.o crc16-msb-pclmul.o
|
||||
|
||||
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
|
||||
obj-y += iomem.o
|
||||
|
|
|
|||
|
|
@ -0,0 +1,195 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* CRC constants generated by:
|
||||
*
|
||||
* ./scripts/gen-crc-consts.py x86_pclmul crc16_msb_0x8bb7,crc32_lsb_0xedb88320,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
|
||||
*
|
||||
* Do not edit manually.
|
||||
*/
|
||||
|
||||
/*
|
||||
* CRC folding constants generated for most-significant-bit-first CRC-16 using
|
||||
* G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
|
||||
*/
|
||||
static const struct {
|
||||
u8 bswap_mask[16];
|
||||
u64 fold_across_2048_bits_consts[2];
|
||||
u64 fold_across_1024_bits_consts[2];
|
||||
u64 fold_across_512_bits_consts[2];
|
||||
u64 fold_across_256_bits_consts[2];
|
||||
u64 fold_across_128_bits_consts[2];
|
||||
u8 shuf_table[48];
|
||||
u64 barrett_reduction_consts[2];
|
||||
} crc16_msb_0x8bb7_consts ____cacheline_aligned __maybe_unused = {
|
||||
.bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
|
||||
.fold_across_2048_bits_consts = {
|
||||
0xdccf000000000000, /* LO64_TERMS: (x^2000 mod G) * x^48 */
|
||||
0x4b0b000000000000, /* HI64_TERMS: (x^2064 mod G) * x^48 */
|
||||
},
|
||||
.fold_across_1024_bits_consts = {
|
||||
0x9d9d000000000000, /* LO64_TERMS: (x^976 mod G) * x^48 */
|
||||
0x7cf5000000000000, /* HI64_TERMS: (x^1040 mod G) * x^48 */
|
||||
},
|
||||
.fold_across_512_bits_consts = {
|
||||
0x044c000000000000, /* LO64_TERMS: (x^464 mod G) * x^48 */
|
||||
0xe658000000000000, /* HI64_TERMS: (x^528 mod G) * x^48 */
|
||||
},
|
||||
.fold_across_256_bits_consts = {
|
||||
0x6ee3000000000000, /* LO64_TERMS: (x^208 mod G) * x^48 */
|
||||
0xe7b5000000000000, /* HI64_TERMS: (x^272 mod G) * x^48 */
|
||||
},
|
||||
.fold_across_128_bits_consts = {
|
||||
0x2d56000000000000, /* LO64_TERMS: (x^80 mod G) * x^48 */
|
||||
0x06df000000000000, /* HI64_TERMS: (x^144 mod G) * x^48 */
|
||||
},
|
||||
.shuf_table = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
.barrett_reduction_consts = {
|
||||
0x8bb7000000000000, /* LO64_TERMS: (G - x^16) * x^48 */
|
||||
0xf65a57f81d33a48a, /* HI64_TERMS: (floor(x^79 / G) * x) - x^64 */
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* CRC folding constants generated for least-significant-bit-first CRC-32 using
|
||||
* G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
|
||||
* x^5 + x^4 + x^2 + x^1 + x^0
|
||||
*/
|
||||
static const struct {
|
||||
u64 fold_across_2048_bits_consts[2];
|
||||
u64 fold_across_1024_bits_consts[2];
|
||||
u64 fold_across_512_bits_consts[2];
|
||||
u64 fold_across_256_bits_consts[2];
|
||||
u64 fold_across_128_bits_consts[2];
|
||||
u8 shuf_table[48];
|
||||
u64 barrett_reduction_consts[2];
|
||||
} crc32_lsb_0xedb88320_consts ____cacheline_aligned __maybe_unused = {
|
||||
.fold_across_2048_bits_consts = {
|
||||
0x00000000ce3371cb, /* HI64_TERMS: (x^2079 mod G) * x^32 */
|
||||
0x00000000e95c1271, /* LO64_TERMS: (x^2015 mod G) * x^32 */
|
||||
},
|
||||
.fold_across_1024_bits_consts = {
|
||||
0x0000000033fff533, /* HI64_TERMS: (x^1055 mod G) * x^32 */
|
||||
0x00000000910eeec1, /* LO64_TERMS: (x^991 mod G) * x^32 */
|
||||
},
|
||||
.fold_across_512_bits_consts = {
|
||||
0x000000008f352d95, /* HI64_TERMS: (x^543 mod G) * x^32 */
|
||||
0x000000001d9513d7, /* LO64_TERMS: (x^479 mod G) * x^32 */
|
||||
},
|
||||
.fold_across_256_bits_consts = {
|
||||
0x00000000f1da05aa, /* HI64_TERMS: (x^287 mod G) * x^32 */
|
||||
0x0000000081256527, /* LO64_TERMS: (x^223 mod G) * x^32 */
|
||||
},
|
||||
.fold_across_128_bits_consts = {
|
||||
0x00000000ae689191, /* HI64_TERMS: (x^159 mod G) * x^32 */
|
||||
0x00000000ccaa009e, /* LO64_TERMS: (x^95 mod G) * x^32 */
|
||||
},
|
||||
.shuf_table = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
.barrett_reduction_consts = {
|
||||
0xb4e5b025f7011641, /* HI64_TERMS: floor(x^95 / G) */
|
||||
0x00000001db710640, /* LO64_TERMS: (G - x^32) * x^31 */
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* CRC folding constants generated for most-significant-bit-first CRC-64 using
|
||||
* G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
|
||||
* x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
|
||||
* x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
|
||||
* x^7 + x^4 + x^1 + x^0
|
||||
*/
|
||||
static const struct {
|
||||
u8 bswap_mask[16];
|
||||
u64 fold_across_2048_bits_consts[2];
|
||||
u64 fold_across_1024_bits_consts[2];
|
||||
u64 fold_across_512_bits_consts[2];
|
||||
u64 fold_across_256_bits_consts[2];
|
||||
u64 fold_across_128_bits_consts[2];
|
||||
u8 shuf_table[48];
|
||||
u64 barrett_reduction_consts[2];
|
||||
} crc64_msb_0x42f0e1eba9ea3693_consts ____cacheline_aligned __maybe_unused = {
|
||||
.bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
|
||||
.fold_across_2048_bits_consts = {
|
||||
0x7f52691a60ddc70d, /* LO64_TERMS: (x^2048 mod G) * x^0 */
|
||||
0x7036b0389f6a0c82, /* HI64_TERMS: (x^2112 mod G) * x^0 */
|
||||
},
|
||||
.fold_across_1024_bits_consts = {
|
||||
0x05cf79dea9ac37d6, /* LO64_TERMS: (x^1024 mod G) * x^0 */
|
||||
0x001067e571d7d5c2, /* HI64_TERMS: (x^1088 mod G) * x^0 */
|
||||
},
|
||||
.fold_across_512_bits_consts = {
|
||||
0x5f6843ca540df020, /* LO64_TERMS: (x^512 mod G) * x^0 */
|
||||
0xddf4b6981205b83f, /* HI64_TERMS: (x^576 mod G) * x^0 */
|
||||
},
|
||||
.fold_across_256_bits_consts = {
|
||||
0x571bee0a227ef92b, /* LO64_TERMS: (x^256 mod G) * x^0 */
|
||||
0x44bef2a201b5200c, /* HI64_TERMS: (x^320 mod G) * x^0 */
|
||||
},
|
||||
.fold_across_128_bits_consts = {
|
||||
0x05f5c3c7eb52fab6, /* LO64_TERMS: (x^128 mod G) * x^0 */
|
||||
0x4eb938a7d257740e, /* HI64_TERMS: (x^192 mod G) * x^0 */
|
||||
},
|
||||
.shuf_table = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
.barrett_reduction_consts = {
|
||||
0x42f0e1eba9ea3693, /* LO64_TERMS: (G - x^64) * x^0 */
|
||||
0x578d29d06cc4f872, /* HI64_TERMS: (floor(x^127 / G) * x) - x^64 */
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* CRC folding constants generated for least-significant-bit-first CRC-64 using
|
||||
* G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
|
||||
* x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
|
||||
* x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
|
||||
* x^4 + x^3 + x^0
|
||||
*/
|
||||
static const struct {
|
||||
u64 fold_across_2048_bits_consts[2];
|
||||
u64 fold_across_1024_bits_consts[2];
|
||||
u64 fold_across_512_bits_consts[2];
|
||||
u64 fold_across_256_bits_consts[2];
|
||||
u64 fold_across_128_bits_consts[2];
|
||||
u8 shuf_table[48];
|
||||
u64 barrett_reduction_consts[2];
|
||||
} crc64_lsb_0x9a6c9329ac4bc9b5_consts ____cacheline_aligned __maybe_unused = {
|
||||
.fold_across_2048_bits_consts = {
|
||||
0x37ccd3e14069cabc, /* HI64_TERMS: (x^2111 mod G) * x^0 */
|
||||
0xa043808c0f782663, /* LO64_TERMS: (x^2047 mod G) * x^0 */
|
||||
},
|
||||
.fold_across_1024_bits_consts = {
|
||||
0xa1ca681e733f9c40, /* HI64_TERMS: (x^1087 mod G) * x^0 */
|
||||
0x5f852fb61e8d92dc, /* LO64_TERMS: (x^1023 mod G) * x^0 */
|
||||
},
|
||||
.fold_across_512_bits_consts = {
|
||||
0x0c32cdb31e18a84a, /* HI64_TERMS: (x^575 mod G) * x^0 */
|
||||
0x62242240ace5045a, /* LO64_TERMS: (x^511 mod G) * x^0 */
|
||||
},
|
||||
.fold_across_256_bits_consts = {
|
||||
0xb0bc2e589204f500, /* HI64_TERMS: (x^319 mod G) * x^0 */
|
||||
0xe1e0bb9d45d7a44c, /* LO64_TERMS: (x^255 mod G) * x^0 */
|
||||
},
|
||||
.fold_across_128_bits_consts = {
|
||||
0xeadc41fd2ba3d420, /* HI64_TERMS: (x^191 mod G) * x^0 */
|
||||
0x21e9761e252621ac, /* LO64_TERMS: (x^127 mod G) * x^0 */
|
||||
},
|
||||
.shuf_table = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
},
|
||||
.barrett_reduction_consts = {
|
||||
0x27ecfa329aef9f77, /* HI64_TERMS: floor(x^127 / G) */
|
||||
0x34d926535897936a, /* LO64_TERMS: (G - x^64 - x^0) / x */
|
||||
},
|
||||
};
|
||||
|
|
@ -0,0 +1,582 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
//
|
||||
// Template to generate [V]PCLMULQDQ-based CRC functions for x86
|
||||
//
|
||||
// Copyright 2025 Google LLC
|
||||
//
|
||||
// Author: Eric Biggers <ebiggers@google.com>
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/objtool.h>
|
||||
|
||||
// Offsets within the generated constants table
|
||||
.set OFFSETOF_BSWAP_MASK, -5*16 // msb-first CRCs only
|
||||
.set OFFSETOF_FOLD_ACROSS_2048_BITS_CONSTS, -4*16 // must precede next
|
||||
.set OFFSETOF_FOLD_ACROSS_1024_BITS_CONSTS, -3*16 // must precede next
|
||||
.set OFFSETOF_FOLD_ACROSS_512_BITS_CONSTS, -2*16 // must precede next
|
||||
.set OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS, -1*16 // must precede next
|
||||
.set OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS, 0*16 // must be 0
|
||||
.set OFFSETOF_SHUF_TABLE, 1*16
|
||||
.set OFFSETOF_BARRETT_REDUCTION_CONSTS, 4*16
|
||||
|
||||
// Emit a VEX (or EVEX) coded instruction if allowed, or emulate it using the
|
||||
// corresponding non-VEX instruction plus any needed moves. The supported
|
||||
// instruction formats are:
|
||||
//
|
||||
// - Two-arg [src, dst], where the non-VEX format is the same.
|
||||
// - Three-arg [src1, src2, dst] where the non-VEX format is
|
||||
// [src1, src2_and_dst]. If src2 != dst, then src1 must != dst too.
|
||||
//
|
||||
// \insn gives the instruction without a "v" prefix and including any immediate
|
||||
// argument if needed to make the instruction follow one of the above formats.
|
||||
// If \unaligned_mem_tmp is given, then the emitted non-VEX code moves \arg1 to
|
||||
// it first; this is needed when \arg1 is an unaligned mem operand.
|
||||
.macro _cond_vex insn:req, arg1:req, arg2:req, arg3, unaligned_mem_tmp
|
||||
.if AVX_LEVEL == 0
|
||||
// VEX not allowed. Emulate it.
|
||||
.ifnb \arg3 // Three-arg [src1, src2, dst]
|
||||
.ifc "\arg2", "\arg3" // src2 == dst?
|
||||
.ifnb \unaligned_mem_tmp
|
||||
movdqu \arg1, \unaligned_mem_tmp
|
||||
\insn \unaligned_mem_tmp, \arg3
|
||||
.else
|
||||
\insn \arg1, \arg3
|
||||
.endif
|
||||
.else // src2 != dst
|
||||
.ifc "\arg1", "\arg3"
|
||||
.error "Can't have src1 == dst when src2 != dst"
|
||||
.endif
|
||||
.ifnb \unaligned_mem_tmp
|
||||
movdqu \arg1, \unaligned_mem_tmp
|
||||
movdqa \arg2, \arg3
|
||||
\insn \unaligned_mem_tmp, \arg3
|
||||
.else
|
||||
movdqa \arg2, \arg3
|
||||
\insn \arg1, \arg3
|
||||
.endif
|
||||
.endif
|
||||
.else // Two-arg [src, dst]
|
||||
.ifnb \unaligned_mem_tmp
|
||||
movdqu \arg1, \unaligned_mem_tmp
|
||||
\insn \unaligned_mem_tmp, \arg2
|
||||
.else
|
||||
\insn \arg1, \arg2
|
||||
.endif
|
||||
.endif
|
||||
.else
|
||||
// VEX is allowed. Emit the desired instruction directly.
|
||||
.ifnb \arg3
|
||||
v\insn \arg1, \arg2, \arg3
|
||||
.else
|
||||
v\insn \arg1, \arg2
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
// Broadcast an aligned 128-bit mem operand to all 128-bit lanes of a vector
|
||||
// register of length VL.
|
||||
.macro _vbroadcast src, dst
|
||||
.if VL == 16
|
||||
_cond_vex movdqa, \src, \dst
|
||||
.elseif VL == 32
|
||||
vbroadcasti128 \src, \dst
|
||||
.else
|
||||
vbroadcasti32x4 \src, \dst
|
||||
.endif
|
||||
.endm
|
||||
|
||||
// Load \vl bytes from the unaligned mem operand \src into \dst, and if the CRC
|
||||
// is msb-first use \bswap_mask to reflect the bytes within each 128-bit lane.
|
||||
.macro _load_data vl, src, bswap_mask, dst
|
||||
.if \vl < 64
|
||||
_cond_vex movdqu, "\src", \dst
|
||||
.else
|
||||
vmovdqu8 \src, \dst
|
||||
.endif
|
||||
.if !LSB_CRC
|
||||
_cond_vex pshufb, \bswap_mask, \dst, \dst
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro _prepare_v0 vl, v0, v1, bswap_mask
|
||||
.if LSB_CRC
|
||||
.if \vl < 64
|
||||
_cond_vex pxor, (BUF), \v0, \v0, unaligned_mem_tmp=\v1
|
||||
.else
|
||||
vpxorq (BUF), \v0, \v0
|
||||
.endif
|
||||
.else
|
||||
_load_data \vl, (BUF), \bswap_mask, \v1
|
||||
.if \vl < 64
|
||||
_cond_vex pxor, \v1, \v0, \v0
|
||||
.else
|
||||
vpxorq \v1, \v0, \v0
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
// The x^0..x^63 terms, i.e. poly128 mod x^64, i.e. the physically low qword for
|
||||
// msb-first order or the physically high qword for lsb-first order
|
||||
#define LO64_TERMS 0
|
||||
|
||||
// The x^64..x^127 terms, i.e. floor(poly128 / x^64), i.e. the physically high
|
||||
// qword for msb-first order or the physically low qword for lsb-first order
|
||||
#define HI64_TERMS 1
|
||||
|
||||
// Multiply the given \src1_terms of each 128-bit lane of \src1 by the given
|
||||
// \src2_terms of each 128-bit lane of \src2, and write the result(s) to \dst.
|
||||
.macro _pclmulqdq src1, src1_terms, src2, src2_terms, dst
|
||||
_cond_vex "pclmulqdq $((\src1_terms ^ LSB_CRC) << 4) ^ (\src2_terms ^ LSB_CRC),", \
|
||||
\src1, \src2, \dst
|
||||
.endm
|
||||
|
||||
// Fold \acc into \data and store the result back into \acc. \data can be an
|
||||
// unaligned mem operand if using VEX is allowed and the CRC is lsb-first so no
|
||||
// byte-reflection is needed; otherwise it must be a vector register. \consts
|
||||
// is a vector register containing the needed fold constants, and \tmp is a
|
||||
// temporary vector register. All arguments must be the same length.
|
||||
.macro _fold_vec acc, data, consts, tmp
|
||||
_pclmulqdq \consts, HI64_TERMS, \acc, HI64_TERMS, \tmp
|
||||
_pclmulqdq \consts, LO64_TERMS, \acc, LO64_TERMS, \acc
|
||||
.if AVX_LEVEL <= 2
|
||||
_cond_vex pxor, \data, \tmp, \tmp
|
||||
_cond_vex pxor, \tmp, \acc, \acc
|
||||
.else
|
||||
vpternlogq $0x96, \data, \tmp, \acc
|
||||
.endif
|
||||
.endm
|
||||
|
||||
// Fold \acc into \data and store the result back into \acc. \data is an
|
||||
// unaligned mem operand, \consts is a vector register containing the needed
|
||||
// fold constants, \bswap_mask is a vector register containing the
|
||||
// byte-reflection table if the CRC is msb-first, and \tmp1 and \tmp2 are
|
||||
// temporary vector registers. All arguments must have length \vl.
|
||||
.macro _fold_vec_mem vl, acc, data, consts, bswap_mask, tmp1, tmp2
|
||||
.if AVX_LEVEL == 0 || !LSB_CRC
|
||||
_load_data \vl, \data, \bswap_mask, \tmp1
|
||||
_fold_vec \acc, \tmp1, \consts, \tmp2
|
||||
.else
|
||||
_fold_vec \acc, \data, \consts, \tmp1
|
||||
.endif
|
||||
.endm
|
||||
|
||||
// Load the constants for folding across 2**i vectors of length VL at a time
|
||||
// into all 128-bit lanes of the vector register CONSTS.
|
||||
.macro _load_vec_folding_consts i
|
||||
_vbroadcast OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS+(4-LOG2_VL-\i)*16(CONSTS_PTR), \
|
||||
CONSTS
|
||||
.endm
|
||||
|
||||
// Given vector registers \v0 and \v1 of length \vl, fold \v0 into \v1 and store
|
||||
// the result back into \v0. If the remaining length mod \vl is nonzero, also
|
||||
// fold \vl data bytes from BUF. For both operations the fold distance is \vl.
|
||||
// \consts must be a register of length \vl containing the fold constants.
|
||||
.macro _fold_vec_final vl, v0, v1, consts, bswap_mask, tmp1, tmp2
|
||||
_fold_vec \v0, \v1, \consts, \tmp1
|
||||
test $\vl, LEN8
|
||||
jz .Lfold_vec_final_done\@
|
||||
_fold_vec_mem \vl, \v0, (BUF), \consts, \bswap_mask, \tmp1, \tmp2
|
||||
add $\vl, BUF
|
||||
.Lfold_vec_final_done\@:
|
||||
.endm
|
||||
|
||||
// This macro generates the body of a CRC function with the following prototype:
|
||||
//
|
||||
// crc_t crc_func(crc_t crc, const u8 *buf, size_t len, const void *consts);
|
||||
//
|
||||
// |crc| is the initial CRC, and crc_t is a data type wide enough to hold it.
|
||||
// |buf| is the data to checksum. |len| is the data length in bytes, which must
|
||||
// be at least 16. |consts| is a pointer to the fold_across_128_bits_consts
|
||||
// field of the constants struct that was generated for the chosen CRC variant.
|
||||
//
|
||||
// Moving onto the macro parameters, \n is the number of bits in the CRC, e.g.
|
||||
// 32 for a CRC-32. Currently the supported values are 8, 16, 32, and 64. If
|
||||
// the file is compiled in i386 mode, then the maximum supported value is 32.
|
||||
//
|
||||
// \lsb_crc is 1 if the CRC processes the least significant bit of each byte
|
||||
// first, i.e. maps bit0 to x^7, bit1 to x^6, ..., bit7 to x^0. \lsb_crc is 0
|
||||
// if the CRC processes the most significant bit of each byte first, i.e. maps
|
||||
// bit0 to x^0, bit1 to x^1, bit7 to x^7.
|
||||
//
|
||||
// \vl is the maximum length of vector register to use in bytes: 16, 32, or 64.
|
||||
//
|
||||
// \avx_level is the level of AVX support to use: 0 for SSE only, 2 for AVX2, or
|
||||
// 512 for AVX512.
|
||||
//
|
||||
// If \vl == 16 && \avx_level == 0, the generated code requires:
|
||||
// PCLMULQDQ && SSE4.1. (Note: all known CPUs with PCLMULQDQ also have SSE4.1.)
|
||||
//
|
||||
// If \vl == 32 && \avx_level == 2, the generated code requires:
|
||||
// VPCLMULQDQ && AVX2.
|
||||
//
|
||||
// If \vl == 64 && \avx_level == 512, the generated code requires:
|
||||
// VPCLMULQDQ && AVX512BW && AVX512VL.
|
||||
//
|
||||
// Other \vl and \avx_level combinations are either not supported or not useful.
|
||||
.macro _crc_pclmul n, lsb_crc, vl, avx_level
|
||||
.set LSB_CRC, \lsb_crc
|
||||
.set VL, \vl
|
||||
.set AVX_LEVEL, \avx_level
|
||||
|
||||
// Define aliases for the xmm, ymm, or zmm registers according to VL.
|
||||
.irp i, 0,1,2,3,4,5,6,7
|
||||
.if VL == 16
|
||||
.set V\i, %xmm\i
|
||||
.set LOG2_VL, 4
|
||||
.elseif VL == 32
|
||||
.set V\i, %ymm\i
|
||||
.set LOG2_VL, 5
|
||||
.elseif VL == 64
|
||||
.set V\i, %zmm\i
|
||||
.set LOG2_VL, 6
|
||||
.else
|
||||
.error "Unsupported vector length"
|
||||
.endif
|
||||
.endr
|
||||
// Define aliases for the function parameters.
|
||||
// Note: when crc_t is shorter than u32, zero-extension to 32 bits is
|
||||
// guaranteed by the ABI. Zero-extension to 64 bits is *not* guaranteed
|
||||
// when crc_t is shorter than u64.
|
||||
#ifdef __x86_64__
|
||||
.if \n <= 32
|
||||
.set CRC, %edi
|
||||
.else
|
||||
.set CRC, %rdi
|
||||
.endif
|
||||
.set BUF, %rsi
|
||||
.set LEN, %rdx
|
||||
.set LEN32, %edx
|
||||
.set LEN8, %dl
|
||||
.set CONSTS_PTR, %rcx
|
||||
#else
|
||||
// 32-bit support, assuming -mregparm=3 and not including support for
|
||||
// CRC-64 (which would use both eax and edx to pass the crc parameter).
|
||||
.set CRC, %eax
|
||||
.set BUF, %edx
|
||||
.set LEN, %ecx
|
||||
.set LEN32, %ecx
|
||||
.set LEN8, %cl
|
||||
.set CONSTS_PTR, %ebx // Passed on stack
|
||||
#endif
|
||||
|
||||
// Define aliases for some local variables. V0-V5 are used without
|
||||
// aliases (for accumulators, data, temporary values, etc). Staying
|
||||
// within the first 8 vector registers keeps the code 32-bit SSE
|
||||
// compatible and reduces the size of 64-bit SSE code slightly.
|
||||
.set BSWAP_MASK, V6
|
||||
.set BSWAP_MASK_YMM, %ymm6
|
||||
.set BSWAP_MASK_XMM, %xmm6
|
||||
.set CONSTS, V7
|
||||
.set CONSTS_YMM, %ymm7
|
||||
.set CONSTS_XMM, %xmm7
|
||||
|
||||
// Use ANNOTATE_NOENDBR to suppress an objtool warning, since the
|
||||
// functions generated by this macro are called only by static_call.
|
||||
ANNOTATE_NOENDBR
|
||||
|
||||
#ifdef __i386__
|
||||
push CONSTS_PTR
|
||||
mov 8(%esp), CONSTS_PTR
|
||||
#endif
|
||||
|
||||
// Create a 128-bit vector that contains the initial CRC in the end
|
||||
// representing the high-order polynomial coefficients, and the rest 0.
|
||||
// If the CRC is msb-first, also load the byte-reflection table.
|
||||
.if \n <= 32
|
||||
_cond_vex movd, CRC, %xmm0
|
||||
.else
|
||||
_cond_vex movq, CRC, %xmm0
|
||||
.endif
|
||||
.if !LSB_CRC
|
||||
_cond_vex pslldq, $(128-\n)/8, %xmm0, %xmm0
|
||||
_vbroadcast OFFSETOF_BSWAP_MASK(CONSTS_PTR), BSWAP_MASK
|
||||
.endif
|
||||
|
||||
// Load the first vector of data and XOR the initial CRC into the
|
||||
// appropriate end of the first 128-bit lane of data. If LEN < VL, then
|
||||
// use a short vector and jump ahead to the final reduction. (LEN >= 16
|
||||
// is guaranteed here but not necessarily LEN >= VL.)
|
||||
.if VL >= 32
|
||||
cmp $VL, LEN
|
||||
jae .Lat_least_1vec\@
|
||||
.if VL == 64
|
||||
cmp $32, LEN32
|
||||
jb .Lless_than_32bytes\@
|
||||
_prepare_v0 32, %ymm0, %ymm1, BSWAP_MASK_YMM
|
||||
add $32, BUF
|
||||
jmp .Lreduce_256bits_to_128bits\@
|
||||
.Lless_than_32bytes\@:
|
||||
.endif
|
||||
_prepare_v0 16, %xmm0, %xmm1, BSWAP_MASK_XMM
|
||||
add $16, BUF
|
||||
vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
|
||||
jmp .Lcheck_for_partial_block\@
|
||||
.Lat_least_1vec\@:
|
||||
.endif
|
||||
_prepare_v0 VL, V0, V1, BSWAP_MASK
|
||||
|
||||
// Handle VL <= LEN < 4*VL.
|
||||
cmp $4*VL-1, LEN
|
||||
ja .Lat_least_4vecs\@
|
||||
add $VL, BUF
|
||||
// If VL <= LEN < 2*VL, then jump ahead to the reduction from 1 vector.
|
||||
// If VL==16 then load fold_across_128_bits_consts first, as the final
|
||||
// reduction depends on it and it won't be loaded anywhere else.
|
||||
cmp $2*VL-1, LEN32
|
||||
.if VL == 16
|
||||
_cond_vex movdqa, OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
|
||||
.endif
|
||||
jbe .Lreduce_1vec_to_128bits\@
|
||||
// Otherwise 2*VL <= LEN < 4*VL. Load one more vector and jump ahead to
|
||||
// the reduction from 2 vectors.
|
||||
_load_data VL, (BUF), BSWAP_MASK, V1
|
||||
add $VL, BUF
|
||||
jmp .Lreduce_2vecs_to_1\@
|
||||
|
||||
.Lat_least_4vecs\@:
|
||||
// Load 3 more vectors of data.
|
||||
_load_data VL, 1*VL(BUF), BSWAP_MASK, V1
|
||||
_load_data VL, 2*VL(BUF), BSWAP_MASK, V2
|
||||
_load_data VL, 3*VL(BUF), BSWAP_MASK, V3
|
||||
sub $-4*VL, BUF // Shorter than 'add 4*VL' when VL=32
|
||||
add $-4*VL, LEN // Shorter than 'sub 4*VL' when VL=32
|
||||
|
||||
// Main loop: while LEN >= 4*VL, fold the 4 vectors V0-V3 into the next
|
||||
// 4 vectors of data and write the result back to V0-V3.
|
||||
cmp $4*VL-1, LEN // Shorter than 'cmp 4*VL' when VL=32
|
||||
jbe .Lreduce_4vecs_to_2\@
|
||||
_load_vec_folding_consts 2
|
||||
.Lfold_4vecs_loop\@:
|
||||
_fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
|
||||
_fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
|
||||
_fold_vec_mem VL, V2, 2*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
|
||||
_fold_vec_mem VL, V3, 3*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
|
||||
sub $-4*VL, BUF
|
||||
add $-4*VL, LEN
|
||||
cmp $4*VL-1, LEN
|
||||
ja .Lfold_4vecs_loop\@
|
||||
|
||||
// Fold V0,V1 into V2,V3 and write the result back to V0,V1. Then fold
|
||||
// two more vectors of data from BUF, if at least that much remains.
|
||||
.Lreduce_4vecs_to_2\@:
|
||||
_load_vec_folding_consts 1
|
||||
_fold_vec V0, V2, CONSTS, V4
|
||||
_fold_vec V1, V3, CONSTS, V4
|
||||
test $2*VL, LEN8
|
||||
jz .Lreduce_2vecs_to_1\@
|
||||
_fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
|
||||
_fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
|
||||
sub $-2*VL, BUF
|
||||
|
||||
// Fold V0 into V1 and write the result back to V0. Then fold one more
|
||||
// vector of data from BUF, if at least that much remains.
|
||||
.Lreduce_2vecs_to_1\@:
|
||||
_load_vec_folding_consts 0
|
||||
_fold_vec_final VL, V0, V1, CONSTS, BSWAP_MASK, V4, V5
|
||||
|
||||
.Lreduce_1vec_to_128bits\@:
|
||||
.if VL == 64
|
||||
// Reduce 512-bit %zmm0 to 256-bit %ymm0. Then fold 256 more bits of
|
||||
// data from BUF, if at least that much remains.
|
||||
vbroadcasti128 OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS(CONSTS_PTR), CONSTS_YMM
|
||||
vextracti64x4 $1, %zmm0, %ymm1
|
||||
_fold_vec_final 32, %ymm0, %ymm1, CONSTS_YMM, BSWAP_MASK_YMM, %ymm4, %ymm5
|
||||
.Lreduce_256bits_to_128bits\@:
|
||||
.endif
|
||||
.if VL >= 32
|
||||
// Reduce 256-bit %ymm0 to 128-bit %xmm0. Then fold 128 more bits of
|
||||
// data from BUF, if at least that much remains.
|
||||
vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
|
||||
vextracti128 $1, %ymm0, %xmm1
|
||||
_fold_vec_final 16, %xmm0, %xmm1, CONSTS_XMM, BSWAP_MASK_XMM, %xmm4, %xmm5
|
||||
.Lcheck_for_partial_block\@:
|
||||
.endif
|
||||
and $15, LEN32
|
||||
jz .Lreduce_128bits_to_crc\@
|
||||
|
||||
// 1 <= LEN <= 15 data bytes remain in BUF. The polynomial is now
|
||||
// A*(x^(8*LEN)) + B, where A is the 128-bit polynomial stored in %xmm0
|
||||
// and B is the polynomial of the remaining LEN data bytes. To reduce
|
||||
// this to 128 bits without needing fold constants for each possible
|
||||
// LEN, rearrange this expression into C1*(x^128) + C2, where
|
||||
// C1 = floor(A / x^(128 - 8*LEN)) and C2 = A*x^(8*LEN) + B mod x^128.
|
||||
// Then fold C1 into C2, which is just another fold across 128 bits.
|
||||
|
||||
.if !LSB_CRC || AVX_LEVEL == 0
|
||||
// Load the last 16 data bytes. Note that originally LEN was >= 16.
|
||||
_load_data 16, "-16(BUF,LEN)", BSWAP_MASK_XMM, %xmm2
|
||||
.endif // Else will use vpblendvb mem operand later.
|
||||
.if !LSB_CRC
|
||||
neg LEN // Needed for indexing shuf_table
|
||||
.endif
|
||||
|
||||
// tmp = A*x^(8*LEN) mod x^128
|
||||
// lsb: pshufb by [LEN, LEN+1, ..., 15, -1, -1, ..., -1]
|
||||
// i.e. right-shift by LEN bytes.
|
||||
// msb: pshufb by [-1, -1, ..., -1, 0, 1, ..., 15-LEN]
|
||||
// i.e. left-shift by LEN bytes.
|
||||
_cond_vex movdqu, "OFFSETOF_SHUF_TABLE+16(CONSTS_PTR,LEN)", %xmm3
|
||||
_cond_vex pshufb, %xmm3, %xmm0, %xmm1
|
||||
|
||||
// C1 = floor(A / x^(128 - 8*LEN))
|
||||
// lsb: pshufb by [-1, -1, ..., -1, 0, 1, ..., LEN-1]
|
||||
// i.e. left-shift by 16-LEN bytes.
|
||||
// msb: pshufb by [16-LEN, 16-LEN+1, ..., 15, -1, -1, ..., -1]
|
||||
// i.e. right-shift by 16-LEN bytes.
|
||||
_cond_vex pshufb, "OFFSETOF_SHUF_TABLE+32*!LSB_CRC(CONSTS_PTR,LEN)", \
|
||||
%xmm0, %xmm0, unaligned_mem_tmp=%xmm4
|
||||
|
||||
// C2 = tmp + B. This is just a blend of tmp with the last 16 data
|
||||
// bytes (reflected if msb-first). The blend mask is the shuffle table
|
||||
// that was used to create tmp. 0 selects tmp, and 1 last16databytes.
|
||||
.if AVX_LEVEL == 0
|
||||
movdqa %xmm0, %xmm4
|
||||
movdqa %xmm3, %xmm0
|
||||
pblendvb %xmm2, %xmm1 // uses %xmm0 as implicit operand
|
||||
movdqa %xmm4, %xmm0
|
||||
.elseif LSB_CRC
|
||||
vpblendvb %xmm3, -16(BUF,LEN), %xmm1, %xmm1
|
||||
.else
|
||||
vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
|
||||
.endif
|
||||
|
||||
// Fold C1 into C2 and store the 128-bit result in %xmm0.
|
||||
_fold_vec %xmm0, %xmm1, CONSTS_XMM, %xmm4
|
||||
|
||||
.Lreduce_128bits_to_crc\@:
|
||||
// Compute the CRC as %xmm0 * x^n mod G. Here %xmm0 means the 128-bit
|
||||
// polynomial stored in %xmm0 (using either lsb-first or msb-first bit
|
||||
// order according to LSB_CRC), and G is the CRC's generator polynomial.
|
||||
|
||||
// First, multiply %xmm0 by x^n and reduce the result to 64+n bits:
|
||||
//
|
||||
// t0 := (x^(64+n) mod G) * floor(%xmm0 / x^64) +
|
||||
// x^n * (%xmm0 mod x^64)
|
||||
//
|
||||
// Store t0 * x^(64-n) in %xmm0. I.e., actually do:
|
||||
//
|
||||
// %xmm0 := ((x^(64+n) mod G) * x^(64-n)) * floor(%xmm0 / x^64) +
|
||||
// x^64 * (%xmm0 mod x^64)
|
||||
//
|
||||
// The extra unreduced factor of x^(64-n) makes floor(t0 / x^n) aligned
|
||||
// to the HI64_TERMS of %xmm0 so that the next pclmulqdq can easily
|
||||
// select it. The 64-bit constant (x^(64+n) mod G) * x^(64-n) in the
|
||||
// msb-first case, or (x^(63+n) mod G) * x^(64-n) in the lsb-first case
|
||||
// (considering the extra factor of x that gets implicitly introduced by
|
||||
// each pclmulqdq when using lsb-first order), is identical to the
|
||||
// constant that was used earlier for folding the LO64_TERMS across 128
|
||||
// bits. Thus it's already available in LO64_TERMS of CONSTS_XMM.
|
||||
_pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm0, HI64_TERMS, %xmm1
|
||||
.if LSB_CRC
|
||||
_cond_vex psrldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64)
|
||||
.else
|
||||
_cond_vex pslldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64)
|
||||
.endif
|
||||
_cond_vex pxor, %xmm1, %xmm0, %xmm0
|
||||
// The HI64_TERMS of %xmm0 now contain floor(t0 / x^n).
|
||||
// The LO64_TERMS of %xmm0 now contain (t0 mod x^n) * x^(64-n).
|
||||
|
||||
// First step of Barrett reduction: Compute floor(t0 / G). This is the
|
||||
// polynomial by which G needs to be multiplied to cancel out the x^n
|
||||
// and higher terms of t0, i.e. to reduce t0 mod G. First do:
|
||||
//
|
||||
// t1 := floor(x^(63+n) / G) * x * floor(t0 / x^n)
|
||||
//
|
||||
// Then the desired value floor(t0 / G) is floor(t1 / x^64). The 63 in
|
||||
// x^(63+n) is the maximum degree of floor(t0 / x^n) and thus the lowest
|
||||
// value that makes enough precision be carried through the calculation.
|
||||
//
|
||||
// The '* x' makes it so the result is floor(t1 / x^64) rather than
|
||||
// floor(t1 / x^63), making it qword-aligned in HI64_TERMS so that it
|
||||
// can be extracted much more easily in the next step. In the lsb-first
|
||||
// case the '* x' happens implicitly. In the msb-first case it must be
|
||||
// done explicitly; floor(x^(63+n) / G) * x is a 65-bit constant, so the
|
||||
// constant passed to pclmulqdq is (floor(x^(63+n) / G) * x) - x^64, and
|
||||
// the multiplication by the x^64 term is handled using a pxor. The
|
||||
// pxor causes the low 64 terms of t1 to be wrong, but they are unused.
|
||||
_cond_vex movdqa, OFFSETOF_BARRETT_REDUCTION_CONSTS(CONSTS_PTR), CONSTS_XMM
|
||||
_pclmulqdq CONSTS_XMM, HI64_TERMS, %xmm0, HI64_TERMS, %xmm1
|
||||
.if !LSB_CRC
|
||||
_cond_vex pxor, %xmm0, %xmm1, %xmm1 // += x^64 * floor(t0 / x^n)
|
||||
.endif
|
||||
// The HI64_TERMS of %xmm1 now contain floor(t1 / x^64) = floor(t0 / G).
|
||||
|
||||
// Second step of Barrett reduction: Cancel out the x^n and higher terms
|
||||
// of t0 by subtracting the needed multiple of G. This gives the CRC:
|
||||
//
|
||||
// crc := t0 - (G * floor(t0 / G))
|
||||
//
|
||||
// But %xmm0 contains t0 * x^(64-n), so it's more convenient to do:
|
||||
//
|
||||
// crc := ((t0 * x^(64-n)) - ((G * x^(64-n)) * floor(t0 / G))) / x^(64-n)
|
||||
//
|
||||
// Furthermore, since the resulting CRC is n-bit, if mod x^n is
|
||||
// explicitly applied to it then the x^n term of G makes no difference
|
||||
// in the result and can be omitted. This helps keep the constant
|
||||
// multiplier in 64 bits in most cases. This gives the following:
|
||||
//
|
||||
// %xmm0 := %xmm0 - (((G - x^n) * x^(64-n)) * floor(t0 / G))
|
||||
// crc := (%xmm0 / x^(64-n)) mod x^n
|
||||
//
|
||||
// In the lsb-first case, each pclmulqdq implicitly introduces
|
||||
// an extra factor of x, so in that case the constant that needs to be
|
||||
// passed to pclmulqdq is actually '(G - x^n) * x^(63-n)' when n <= 63.
|
||||
// For lsb-first CRCs where n=64, the extra factor of x cannot be as
|
||||
// easily avoided. In that case, instead pass '(G - x^n - x^0) / x' to
|
||||
// pclmulqdq and handle the x^0 term (i.e. 1) separately. (All CRC
|
||||
// polynomials have nonzero x^n and x^0 terms.) It works out as: the
|
||||
// CRC has be XORed with the physically low qword of %xmm1, representing
|
||||
// floor(t0 / G). The most efficient way to do that is to move it to
|
||||
// the physically high qword and use a ternlog to combine the two XORs.
|
||||
.if LSB_CRC && \n == 64
|
||||
_cond_vex punpcklqdq, %xmm1, %xmm2, %xmm2
|
||||
_pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
|
||||
.if AVX_LEVEL <= 2
|
||||
_cond_vex pxor, %xmm2, %xmm0, %xmm0
|
||||
_cond_vex pxor, %xmm1, %xmm0, %xmm0
|
||||
.else
|
||||
vpternlogq $0x96, %xmm2, %xmm1, %xmm0
|
||||
.endif
|
||||
_cond_vex "pextrq $1,", %xmm0, %rax // (%xmm0 / x^0) mod x^64
|
||||
.else
|
||||
_pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
|
||||
_cond_vex pxor, %xmm1, %xmm0, %xmm0
|
||||
.if \n == 8
|
||||
_cond_vex "pextrb $7 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^56) mod x^8
|
||||
.elseif \n == 16
|
||||
_cond_vex "pextrw $3 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^48) mod x^16
|
||||
.elseif \n == 32
|
||||
_cond_vex "pextrd $1 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^32) mod x^32
|
||||
.else // \n == 64 && !LSB_CRC
|
||||
_cond_vex movq, %xmm0, %rax // (%xmm0 / x^0) mod x^64
|
||||
.endif
|
||||
.endif
|
||||
|
||||
.if VL > 16
|
||||
vzeroupper // Needed when ymm or zmm registers may have been used.
|
||||
.endif
|
||||
#ifdef __i386__
|
||||
pop CONSTS_PTR
|
||||
#endif
|
||||
RET
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_AS_VPCLMULQDQ
|
||||
#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \
|
||||
SYM_FUNC_START(prefix##_pclmul_sse); \
|
||||
_crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \
|
||||
SYM_FUNC_END(prefix##_pclmul_sse); \
|
||||
\
|
||||
SYM_FUNC_START(prefix##_vpclmul_avx2); \
|
||||
_crc_pclmul n=bits, lsb_crc=lsb, vl=32, avx_level=2; \
|
||||
SYM_FUNC_END(prefix##_vpclmul_avx2); \
|
||||
\
|
||||
SYM_FUNC_START(prefix##_vpclmul_avx512); \
|
||||
_crc_pclmul n=bits, lsb_crc=lsb, vl=64, avx_level=512; \
|
||||
SYM_FUNC_END(prefix##_vpclmul_avx512);
|
||||
#else
|
||||
#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \
|
||||
SYM_FUNC_START(prefix##_pclmul_sse); \
|
||||
_crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \
|
||||
SYM_FUNC_END(prefix##_pclmul_sse);
|
||||
#endif // !CONFIG_AS_VPCLMULQDQ
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
/*
|
||||
* Macros for accessing the [V]PCLMULQDQ-based CRC functions that are
|
||||
* instantiated by crc-pclmul-template.S
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*
|
||||
* Author: Eric Biggers <ebiggers@google.com>
|
||||
*/
|
||||
#ifndef _CRC_PCLMUL_TEMPLATE_H
|
||||
#define _CRC_PCLMUL_TEMPLATE_H
|
||||
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <linux/static_call.h>
|
||||
#include "crc-pclmul-consts.h"
|
||||
|
||||
#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \
|
||||
crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \
|
||||
const void *consts_ptr); \
|
||||
crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \
|
||||
const void *consts_ptr); \
|
||||
crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \
|
||||
const void *consts_ptr); \
|
||||
DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)
|
||||
|
||||
#define INIT_CRC_PCLMUL(prefix) \
|
||||
do { \
|
||||
if (IS_ENABLED(CONFIG_AS_VPCLMULQDQ) && \
|
||||
boot_cpu_has(X86_FEATURE_VPCLMULQDQ) && \
|
||||
boot_cpu_has(X86_FEATURE_AVX2) && \
|
||||
cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL)) { \
|
||||
if (boot_cpu_has(X86_FEATURE_AVX512BW) && \
|
||||
boot_cpu_has(X86_FEATURE_AVX512VL) && \
|
||||
!boot_cpu_has(X86_FEATURE_PREFER_YMM) && \
|
||||
cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL)) { \
|
||||
static_call_update(prefix##_pclmul, \
|
||||
prefix##_vpclmul_avx512); \
|
||||
} else { \
|
||||
static_call_update(prefix##_pclmul, \
|
||||
prefix##_vpclmul_avx2); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16
|
||||
* bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD.
|
||||
*
|
||||
* 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions.
|
||||
* There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(),
|
||||
* varying by CPU and factors such as which parts of the "FPU" state userspace
|
||||
* has touched, which could result in a larger cutoff being better. Indeed, a
|
||||
* larger cutoff is usually better for a *single* message. However, the
|
||||
* overhead of the FPU section gets amortized if multiple FPU sections get
|
||||
* executed before returning to userspace, since the XSAVE and XRSTOR occur only
|
||||
* once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on
|
||||
* the dcache than the table-based code is, a 16-byte cutoff seems to work well.
|
||||
*/
|
||||
#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \
|
||||
do { \
|
||||
if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \
|
||||
crypto_simd_usable()) { \
|
||||
const void *consts_ptr; \
|
||||
\
|
||||
consts_ptr = (consts).fold_across_128_bits_consts; \
|
||||
kernel_fpu_begin(); \
|
||||
crc = static_call(prefix##_pclmul)((crc), (p), (len), \
|
||||
consts_ptr); \
|
||||
kernel_fpu_end(); \
|
||||
return crc; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif /* _CRC_PCLMUL_TEMPLATE_H */
|
||||
|
|
@ -1,37 +1,32 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* CRC-T10DIF using PCLMULQDQ instructions
|
||||
* CRC-T10DIF using [V]PCLMULQDQ instructions
|
||||
*
|
||||
* Copyright 2024 Google LLC
|
||||
*/
|
||||
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <linux/crc-t10dif.h>
|
||||
#include <linux/module.h>
|
||||
#include "crc-pclmul-template.h"
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
|
||||
|
||||
asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len);
|
||||
DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16);
|
||||
|
||||
u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (len >= 16 &&
|
||||
static_key_enabled(&have_pclmulqdq) && crypto_simd_usable()) {
|
||||
kernel_fpu_begin();
|
||||
crc = crc_t10dif_pcl(crc, p, len);
|
||||
kernel_fpu_end();
|
||||
return crc;
|
||||
}
|
||||
CRC_PCLMUL(crc, p, len, crc16_msb, crc16_msb_0x8bb7_consts,
|
||||
have_pclmulqdq);
|
||||
return crc_t10dif_generic(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc_t10dif_arch);
|
||||
|
||||
static int __init crc_t10dif_x86_init(void)
|
||||
{
|
||||
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
|
||||
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
|
||||
static_branch_enable(&have_pclmulqdq);
|
||||
INIT_CRC_PCLMUL(crc16_msb);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(crc_t10dif_x86_init);
|
||||
|
|
@ -41,11 +36,5 @@ static void __exit crc_t10dif_x86_exit(void)
|
|||
}
|
||||
module_exit(crc_t10dif_x86_exit);
|
||||
|
||||
bool crc_t10dif_is_optimized(void)
|
||||
{
|
||||
return static_key_enabled(&have_pclmulqdq);
|
||||
}
|
||||
EXPORT_SYMBOL(crc_t10dif_is_optimized);
|
||||
|
||||
MODULE_DESCRIPTION("CRC-T10DIF using PCLMULQDQ instructions");
|
||||
MODULE_DESCRIPTION("CRC-T10DIF using [V]PCLMULQDQ instructions");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
|
|||
|
|
@ -0,0 +1,6 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
// Copyright 2025 Google LLC
|
||||
|
||||
#include "crc-pclmul-template.S"
|
||||
|
||||
DEFINE_CRC_PCLMUL_FUNCS(crc16_msb, /* bits= */ 16, /* lsb= */ 0)
|
||||
|
|
@ -7,43 +7,20 @@
|
|||
* Copyright 2024 Google LLC
|
||||
*/
|
||||
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/* minimum size of buffer for crc32_pclmul_le_16 */
|
||||
#define CRC32_PCLMUL_MIN_LEN 64
|
||||
#include "crc-pclmul-template.h"
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE(have_crc32);
|
||||
static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
|
||||
|
||||
u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
|
||||
DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
|
||||
|
||||
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
if (len >= CRC32_PCLMUL_MIN_LEN + 15 &&
|
||||
static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
|
||||
size_t n = -(uintptr_t)p & 15;
|
||||
|
||||
/* align p to 16-byte boundary */
|
||||
if (n) {
|
||||
crc = crc32_le_base(crc, p, n);
|
||||
p += n;
|
||||
len -= n;
|
||||
}
|
||||
n = round_down(len, 16);
|
||||
kernel_fpu_begin();
|
||||
crc = crc32_pclmul_le_16(crc, p, n);
|
||||
kernel_fpu_end();
|
||||
p += n;
|
||||
len -= n;
|
||||
}
|
||||
if (len)
|
||||
crc = crc32_le_base(crc, p, len);
|
||||
return crc;
|
||||
CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
|
||||
have_pclmulqdq);
|
||||
return crc32_le_base(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL(crc32_le_arch);
|
||||
|
||||
|
|
@ -61,12 +38,12 @@ EXPORT_SYMBOL(crc32_le_arch);
|
|||
|
||||
asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
|
||||
|
||||
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
||||
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
size_t num_longs;
|
||||
|
||||
if (!static_branch_likely(&have_crc32))
|
||||
return crc32c_le_base(crc, p, len);
|
||||
return crc32c_base(crc, p, len);
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
|
||||
static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
|
||||
|
|
@ -78,14 +55,22 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
|
|||
|
||||
for (num_longs = len / sizeof(unsigned long);
|
||||
num_longs != 0; num_longs--, p += sizeof(unsigned long))
|
||||
asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
|
||||
asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
|
||||
|
||||
for (len %= sizeof(unsigned long); len; len--, p++)
|
||||
asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
|
||||
if (sizeof(unsigned long) > 4 && (len & 4)) {
|
||||
asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
|
||||
p += 4;
|
||||
}
|
||||
if (len & 2) {
|
||||
asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
|
||||
p += 2;
|
||||
}
|
||||
if (len & 1)
|
||||
asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
|
||||
|
||||
return crc;
|
||||
}
|
||||
EXPORT_SYMBOL(crc32c_le_arch);
|
||||
EXPORT_SYMBOL(crc32c_arch);
|
||||
|
||||
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
|
||||
{
|
||||
|
|
@ -97,8 +82,10 @@ static int __init crc32_x86_init(void)
|
|||
{
|
||||
if (boot_cpu_has(X86_FEATURE_XMM4_2))
|
||||
static_branch_enable(&have_crc32);
|
||||
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
|
||||
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
|
||||
static_branch_enable(&have_pclmulqdq);
|
||||
INIT_CRC_PCLMUL(crc32_lsb);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(crc32_x86_init);
|
||||
|
|
|
|||
|
|
@ -1,217 +1,6 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright 2012 Xyratex Technology Limited
|
||||
*
|
||||
* Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
|
||||
* calculation.
|
||||
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
|
||||
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
|
||||
* at:
|
||||
* http://www.intel.com/products/processor/manuals/
|
||||
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
|
||||
* Volume 2B: Instruction Set Reference, N-Z
|
||||
*
|
||||
* Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
|
||||
* Alexander Boyko <Alexander_Boyko@xyratex.com>
|
||||
*/
|
||||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
// Copyright 2025 Google LLC
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include "crc-pclmul-template.S"
|
||||
|
||||
|
||||
.section .rodata
|
||||
.align 16
|
||||
/*
|
||||
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
|
||||
* #define CONSTANT_R1 0x154442bd4LL
|
||||
*
|
||||
* [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
|
||||
* #define CONSTANT_R2 0x1c6e41596LL
|
||||
*/
|
||||
.Lconstant_R2R1:
|
||||
.octa 0x00000001c6e415960000000154442bd4
|
||||
/*
|
||||
* [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
|
||||
* #define CONSTANT_R3 0x1751997d0LL
|
||||
*
|
||||
* [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
|
||||
* #define CONSTANT_R4 0x0ccaa009eLL
|
||||
*/
|
||||
.Lconstant_R4R3:
|
||||
.octa 0x00000000ccaa009e00000001751997d0
|
||||
/*
|
||||
* [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
|
||||
* #define CONSTANT_R5 0x163cd6124LL
|
||||
*/
|
||||
.Lconstant_R5:
|
||||
.octa 0x00000000000000000000000163cd6124
|
||||
.Lconstant_mask32:
|
||||
.octa 0x000000000000000000000000FFFFFFFF
|
||||
/*
|
||||
* #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
|
||||
*
|
||||
* Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
|
||||
* #define CONSTANT_RU 0x1F7011641LL
|
||||
*/
|
||||
.Lconstant_RUpoly:
|
||||
.octa 0x00000001F701164100000001DB710641
|
||||
|
||||
#define CONSTANT %xmm0
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define CRC %edi
|
||||
#define BUF %rsi
|
||||
#define LEN %rdx
|
||||
#else
|
||||
#define CRC %eax
|
||||
#define BUF %edx
|
||||
#define LEN %ecx
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
.text
|
||||
/**
|
||||
* Calculate crc32
|
||||
* CRC - initial crc32
|
||||
* BUF - buffer (16 bytes aligned)
|
||||
* LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63
|
||||
* return %eax crc32
|
||||
* u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
|
||||
*/
|
||||
|
||||
SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
|
||||
movdqa (BUF), %xmm1
|
||||
movdqa 0x10(BUF), %xmm2
|
||||
movdqa 0x20(BUF), %xmm3
|
||||
movdqa 0x30(BUF), %xmm4
|
||||
movd CRC, CONSTANT
|
||||
pxor CONSTANT, %xmm1
|
||||
sub $0x40, LEN
|
||||
add $0x40, BUF
|
||||
cmp $0x40, LEN
|
||||
jb .Lless_64
|
||||
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_R2R1(%rip), CONSTANT
|
||||
#else
|
||||
movdqa .Lconstant_R2R1, CONSTANT
|
||||
#endif
|
||||
|
||||
.Lloop_64:/* 64 bytes Full cache line folding */
|
||||
prefetchnta 0x40(BUF)
|
||||
movdqa %xmm1, %xmm5
|
||||
movdqa %xmm2, %xmm6
|
||||
movdqa %xmm3, %xmm7
|
||||
#ifdef __x86_64__
|
||||
movdqa %xmm4, %xmm8
|
||||
#endif
|
||||
pclmulqdq $0x00, CONSTANT, %xmm1
|
||||
pclmulqdq $0x00, CONSTANT, %xmm2
|
||||
pclmulqdq $0x00, CONSTANT, %xmm3
|
||||
#ifdef __x86_64__
|
||||
pclmulqdq $0x00, CONSTANT, %xmm4
|
||||
#endif
|
||||
pclmulqdq $0x11, CONSTANT, %xmm5
|
||||
pclmulqdq $0x11, CONSTANT, %xmm6
|
||||
pclmulqdq $0x11, CONSTANT, %xmm7
|
||||
#ifdef __x86_64__
|
||||
pclmulqdq $0x11, CONSTANT, %xmm8
|
||||
#endif
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
#ifdef __x86_64__
|
||||
pxor %xmm8, %xmm4
|
||||
#else
|
||||
/* xmm8 unsupported for x32 */
|
||||
movdqa %xmm4, %xmm5
|
||||
pclmulqdq $0x00, CONSTANT, %xmm4
|
||||
pclmulqdq $0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm4
|
||||
#endif
|
||||
|
||||
pxor (BUF), %xmm1
|
||||
pxor 0x10(BUF), %xmm2
|
||||
pxor 0x20(BUF), %xmm3
|
||||
pxor 0x30(BUF), %xmm4
|
||||
|
||||
sub $0x40, LEN
|
||||
add $0x40, BUF
|
||||
cmp $0x40, LEN
|
||||
jge .Lloop_64
|
||||
.Lless_64:/* Folding cache line into 128bit */
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_R4R3(%rip), CONSTANT
|
||||
#else
|
||||
movdqa .Lconstant_R4R3, CONSTANT
|
||||
#endif
|
||||
prefetchnta (BUF)
|
||||
|
||||
movdqa %xmm1, %xmm5
|
||||
pclmulqdq $0x00, CONSTANT, %xmm1
|
||||
pclmulqdq $0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
|
||||
movdqa %xmm1, %xmm5
|
||||
pclmulqdq $0x00, CONSTANT, %xmm1
|
||||
pclmulqdq $0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm3, %xmm1
|
||||
|
||||
movdqa %xmm1, %xmm5
|
||||
pclmulqdq $0x00, CONSTANT, %xmm1
|
||||
pclmulqdq $0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm4, %xmm1
|
||||
|
||||
cmp $0x10, LEN
|
||||
jb .Lfold_64
|
||||
.Lloop_16:/* Folding rest buffer into 128bit */
|
||||
movdqa %xmm1, %xmm5
|
||||
pclmulqdq $0x00, CONSTANT, %xmm1
|
||||
pclmulqdq $0x11, CONSTANT, %xmm5
|
||||
pxor %xmm5, %xmm1
|
||||
pxor (BUF), %xmm1
|
||||
sub $0x10, LEN
|
||||
add $0x10, BUF
|
||||
cmp $0x10, LEN
|
||||
jge .Lloop_16
|
||||
|
||||
.Lfold_64:
|
||||
/* perform the last 64 bit fold, also adds 32 zeroes
|
||||
* to the input stream */
|
||||
pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
|
||||
psrldq $0x08, %xmm1
|
||||
pxor CONSTANT, %xmm1
|
||||
|
||||
/* final 32-bit fold */
|
||||
movdqa %xmm1, %xmm2
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_R5(%rip), CONSTANT
|
||||
movdqa .Lconstant_mask32(%rip), %xmm3
|
||||
#else
|
||||
movdqa .Lconstant_R5, CONSTANT
|
||||
movdqa .Lconstant_mask32, %xmm3
|
||||
#endif
|
||||
psrldq $0x04, %xmm2
|
||||
pand %xmm3, %xmm1
|
||||
pclmulqdq $0x00, CONSTANT, %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
|
||||
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
|
||||
#ifdef __x86_64__
|
||||
movdqa .Lconstant_RUpoly(%rip), CONSTANT
|
||||
#else
|
||||
movdqa .Lconstant_RUpoly, CONSTANT
|
||||
#endif
|
||||
movdqa %xmm1, %xmm2
|
||||
pand %xmm3, %xmm1
|
||||
pclmulqdq $0x10, CONSTANT, %xmm1
|
||||
pand %xmm3, %xmm1
|
||||
pclmulqdq $0x00, CONSTANT, %xmm1
|
||||
pxor %xmm2, %xmm1
|
||||
pextrd $0x01, %xmm1, %eax
|
||||
|
||||
RET
|
||||
SYM_FUNC_END(crc32_pclmul_le_16)
|
||||
DEFINE_CRC_PCLMUL_FUNCS(crc32_lsb, /* bits= */ 32, /* lsb= */ 1)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
/*
|
||||
* CRC64 using [V]PCLMULQDQ instructions
|
||||
*
|
||||
* Copyright 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include <linux/crc64.h>
|
||||
#include <linux/module.h>
|
||||
#include "crc-pclmul-template.h"
|
||||
|
||||
static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
|
||||
|
||||
DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64);
|
||||
DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64);
|
||||
|
||||
u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
|
||||
{
|
||||
CRC_PCLMUL(crc, p, len, crc64_msb, crc64_msb_0x42f0e1eba9ea3693_consts,
|
||||
have_pclmulqdq);
|
||||
return crc64_be_generic(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crc64_be_arch);
|
||||
|
||||
u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
|
||||
{
|
||||
CRC_PCLMUL(crc, p, len, crc64_lsb, crc64_lsb_0x9a6c9329ac4bc9b5_consts,
|
||||
have_pclmulqdq);
|
||||
return crc64_nvme_generic(crc, p, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(crc64_nvme_arch);
|
||||
|
||||
static int __init crc64_x86_init(void)
|
||||
{
|
||||
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
|
||||
static_branch_enable(&have_pclmulqdq);
|
||||
INIT_CRC_PCLMUL(crc64_msb);
|
||||
INIT_CRC_PCLMUL(crc64_lsb);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(crc64_x86_init);
|
||||
|
||||
static void __exit crc64_x86_exit(void)
|
||||
{
|
||||
}
|
||||
module_exit(crc64_x86_exit);
|
||||
|
||||
MODULE_DESCRIPTION("CRC64 using [V]PCLMULQDQ instructions");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
// Copyright 2025 Google LLC
|
||||
|
||||
#include "crc-pclmul-template.S"
|
||||
|
||||
DEFINE_CRC_PCLMUL_FUNCS(crc64_msb, /* bits= */ 64, /* lsb= */ 0)
|
||||
DEFINE_CRC_PCLMUL_FUNCS(crc64_lsb, /* bits= */ 64, /* lsb= */ 1)
|
||||
|
|
@ -1,332 +0,0 @@
|
|||
########################################################################
|
||||
# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
|
||||
#
|
||||
# Copyright (c) 2013, Intel Corporation
|
||||
#
|
||||
# Authors:
|
||||
# Erdinc Ozturk <erdinc.ozturk@intel.com>
|
||||
# Vinodh Gopal <vinodh.gopal@intel.com>
|
||||
# James Guilford <james.guilford@intel.com>
|
||||
# Tim Chen <tim.c.chen@linux.intel.com>
|
||||
#
|
||||
# This software is available to you under a choice of one of two
|
||||
# licenses. You may choose to be licensed under the terms of the GNU
|
||||
# General Public License (GPL) Version 2, available from the file
|
||||
# COPYING in the main directory of this source tree, or the
|
||||
# OpenIB.org BSD license below:
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
#
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
#
|
||||
# * Neither the name of the Intel Corporation nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Reference paper titled "Fast CRC Computation for Generic
|
||||
# Polynomials Using PCLMULQDQ Instruction"
|
||||
# URL: http://www.intel.com/content/dam/www/public/us/en/documents
|
||||
# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
|
||||
#
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
|
||||
#define init_crc %edi
|
||||
#define buf %rsi
|
||||
#define len %rdx
|
||||
|
||||
#define FOLD_CONSTS %xmm10
|
||||
#define BSWAP_MASK %xmm11
|
||||
|
||||
# Fold reg1, reg2 into the next 32 data bytes, storing the result back into
|
||||
# reg1, reg2.
|
||||
.macro fold_32_bytes offset, reg1, reg2
|
||||
movdqu \offset(buf), %xmm9
|
||||
movdqu \offset+16(buf), %xmm12
|
||||
pshufb BSWAP_MASK, %xmm9
|
||||
pshufb BSWAP_MASK, %xmm12
|
||||
movdqa \reg1, %xmm8
|
||||
movdqa \reg2, %xmm13
|
||||
pclmulqdq $0x00, FOLD_CONSTS, \reg1
|
||||
pclmulqdq $0x11, FOLD_CONSTS, %xmm8
|
||||
pclmulqdq $0x00, FOLD_CONSTS, \reg2
|
||||
pclmulqdq $0x11, FOLD_CONSTS, %xmm13
|
||||
pxor %xmm9 , \reg1
|
||||
xorps %xmm8 , \reg1
|
||||
pxor %xmm12, \reg2
|
||||
xorps %xmm13, \reg2
|
||||
.endm
|
||||
|
||||
# Fold src_reg into dst_reg.
|
||||
.macro fold_16_bytes src_reg, dst_reg
|
||||
movdqa \src_reg, %xmm8
|
||||
pclmulqdq $0x11, FOLD_CONSTS, \src_reg
|
||||
pclmulqdq $0x00, FOLD_CONSTS, %xmm8
|
||||
pxor %xmm8, \dst_reg
|
||||
xorps \src_reg, \dst_reg
|
||||
.endm
|
||||
|
||||
#
|
||||
# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len);
|
||||
#
|
||||
# Assumes len >= 16.
|
||||
#
|
||||
SYM_FUNC_START(crc_t10dif_pcl)
|
||||
|
||||
movdqa .Lbswap_mask(%rip), BSWAP_MASK
|
||||
|
||||
# For sizes less than 256 bytes, we can't fold 128 bytes at a time.
|
||||
cmp $256, len
|
||||
jl .Lless_than_256_bytes
|
||||
|
||||
# Load the first 128 data bytes. Byte swapping is necessary to make the
|
||||
# bit order match the polynomial coefficient order.
|
||||
movdqu 16*0(buf), %xmm0
|
||||
movdqu 16*1(buf), %xmm1
|
||||
movdqu 16*2(buf), %xmm2
|
||||
movdqu 16*3(buf), %xmm3
|
||||
movdqu 16*4(buf), %xmm4
|
||||
movdqu 16*5(buf), %xmm5
|
||||
movdqu 16*6(buf), %xmm6
|
||||
movdqu 16*7(buf), %xmm7
|
||||
add $128, buf
|
||||
pshufb BSWAP_MASK, %xmm0
|
||||
pshufb BSWAP_MASK, %xmm1
|
||||
pshufb BSWAP_MASK, %xmm2
|
||||
pshufb BSWAP_MASK, %xmm3
|
||||
pshufb BSWAP_MASK, %xmm4
|
||||
pshufb BSWAP_MASK, %xmm5
|
||||
pshufb BSWAP_MASK, %xmm6
|
||||
pshufb BSWAP_MASK, %xmm7
|
||||
|
||||
# XOR the first 16 data *bits* with the initial CRC value.
|
||||
pxor %xmm8, %xmm8
|
||||
pinsrw $7, init_crc, %xmm8
|
||||
pxor %xmm8, %xmm0
|
||||
|
||||
movdqa .Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS
|
||||
|
||||
# Subtract 128 for the 128 data bytes just consumed. Subtract another
|
||||
# 128 to simplify the termination condition of the following loop.
|
||||
sub $256, len
|
||||
|
||||
# While >= 128 data bytes remain (not counting xmm0-7), fold the 128
|
||||
# bytes xmm0-7 into them, storing the result back into xmm0-7.
|
||||
.Lfold_128_bytes_loop:
|
||||
fold_32_bytes 0, %xmm0, %xmm1
|
||||
fold_32_bytes 32, %xmm2, %xmm3
|
||||
fold_32_bytes 64, %xmm4, %xmm5
|
||||
fold_32_bytes 96, %xmm6, %xmm7
|
||||
add $128, buf
|
||||
sub $128, len
|
||||
jge .Lfold_128_bytes_loop
|
||||
|
||||
# Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7.
|
||||
|
||||
# Fold across 64 bytes.
|
||||
movdqa .Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS
|
||||
fold_16_bytes %xmm0, %xmm4
|
||||
fold_16_bytes %xmm1, %xmm5
|
||||
fold_16_bytes %xmm2, %xmm6
|
||||
fold_16_bytes %xmm3, %xmm7
|
||||
# Fold across 32 bytes.
|
||||
movdqa .Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS
|
||||
fold_16_bytes %xmm4, %xmm6
|
||||
fold_16_bytes %xmm5, %xmm7
|
||||
# Fold across 16 bytes.
|
||||
movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
|
||||
fold_16_bytes %xmm6, %xmm7
|
||||
|
||||
# Add 128 to get the correct number of data bytes remaining in 0...127
|
||||
# (not counting xmm7), following the previous extra subtraction by 128.
|
||||
# Then subtract 16 to simplify the termination condition of the
|
||||
# following loop.
|
||||
add $128-16, len
|
||||
|
||||
# While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes
|
||||
# xmm7 into them, storing the result back into xmm7.
|
||||
jl .Lfold_16_bytes_loop_done
|
||||
.Lfold_16_bytes_loop:
|
||||
movdqa %xmm7, %xmm8
|
||||
pclmulqdq $0x11, FOLD_CONSTS, %xmm7
|
||||
pclmulqdq $0x00, FOLD_CONSTS, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
movdqu (buf), %xmm0
|
||||
pshufb BSWAP_MASK, %xmm0
|
||||
pxor %xmm0 , %xmm7
|
||||
add $16, buf
|
||||
sub $16, len
|
||||
jge .Lfold_16_bytes_loop
|
||||
|
||||
.Lfold_16_bytes_loop_done:
|
||||
# Add 16 to get the correct number of data bytes remaining in 0...15
|
||||
# (not counting xmm7), following the previous extra subtraction by 16.
|
||||
add $16, len
|
||||
je .Lreduce_final_16_bytes
|
||||
|
||||
.Lhandle_partial_segment:
|
||||
# Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16
|
||||
# bytes are in xmm7 and the rest are the remaining data in 'buf'. To do
|
||||
# this without needing a fold constant for each possible 'len', redivide
|
||||
# the bytes into a first chunk of 'len' bytes and a second chunk of 16
|
||||
# bytes, then fold the first chunk into the second.
|
||||
|
||||
movdqa %xmm7, %xmm2
|
||||
|
||||
# xmm1 = last 16 original data bytes
|
||||
movdqu -16(buf, len), %xmm1
|
||||
pshufb BSWAP_MASK, %xmm1
|
||||
|
||||
# xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes.
|
||||
lea .Lbyteshift_table+16(%rip), %rax
|
||||
sub len, %rax
|
||||
movdqu (%rax), %xmm0
|
||||
pshufb %xmm0, %xmm2
|
||||
|
||||
# xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes.
|
||||
pxor .Lmask1(%rip), %xmm0
|
||||
pshufb %xmm0, %xmm7
|
||||
|
||||
# xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes),
|
||||
# then '16-len' bytes from xmm2 (high-order bytes).
|
||||
pblendvb %xmm2, %xmm1 #xmm0 is implicit
|
||||
|
||||
# Fold the first chunk into the second chunk, storing the result in xmm7.
|
||||
movdqa %xmm7, %xmm8
|
||||
pclmulqdq $0x11, FOLD_CONSTS, %xmm7
|
||||
pclmulqdq $0x00, FOLD_CONSTS, %xmm8
|
||||
pxor %xmm8, %xmm7
|
||||
pxor %xmm1, %xmm7
|
||||
|
||||
.Lreduce_final_16_bytes:
|
||||
# Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC
|
||||
|
||||
# Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
|
||||
movdqa .Lfinal_fold_consts(%rip), FOLD_CONSTS
|
||||
|
||||
# Fold the high 64 bits into the low 64 bits, while also multiplying by
|
||||
# x^64. This produces a 128-bit value congruent to x^64 * M(x) and
|
||||
# whose low 48 bits are 0.
|
||||
movdqa %xmm7, %xmm0
|
||||
pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x))
|
||||
pslldq $8, %xmm0
|
||||
pxor %xmm0, %xmm7 # + low bits * x^64
|
||||
|
||||
# Fold the high 32 bits into the low 96 bits. This produces a 96-bit
|
||||
# value congruent to x^64 * M(x) and whose low 48 bits are 0.
|
||||
movdqa %xmm7, %xmm0
|
||||
pand .Lmask2(%rip), %xmm0 # zero high 32 bits
|
||||
psrldq $12, %xmm7 # extract high 32 bits
|
||||
pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x))
|
||||
pxor %xmm0, %xmm7 # + low bits
|
||||
|
||||
# Load G(x) and floor(x^48 / G(x)).
|
||||
movdqa .Lbarrett_reduction_consts(%rip), FOLD_CONSTS
|
||||
|
||||
# Use Barrett reduction to compute the final CRC value.
|
||||
movdqa %xmm7, %xmm0
|
||||
pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x))
|
||||
psrlq $32, %xmm7 # /= x^32
|
||||
pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # *= G(x)
|
||||
psrlq $48, %xmm0
|
||||
pxor %xmm7, %xmm0 # + low 16 nonzero bits
|
||||
# Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
|
||||
|
||||
pextrw $0, %xmm0, %eax
|
||||
RET
|
||||
|
||||
.align 16
|
||||
.Lless_than_256_bytes:
|
||||
# Checksumming a buffer of length 16...255 bytes
|
||||
|
||||
# Load the first 16 data bytes.
|
||||
movdqu (buf), %xmm7
|
||||
pshufb BSWAP_MASK, %xmm7
|
||||
add $16, buf
|
||||
|
||||
# XOR the first 16 data *bits* with the initial CRC value.
|
||||
pxor %xmm0, %xmm0
|
||||
pinsrw $7, init_crc, %xmm0
|
||||
pxor %xmm0, %xmm7
|
||||
|
||||
movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
|
||||
cmp $16, len
|
||||
je .Lreduce_final_16_bytes # len == 16
|
||||
sub $32, len
|
||||
jge .Lfold_16_bytes_loop # 32 <= len <= 255
|
||||
add $16, len
|
||||
jmp .Lhandle_partial_segment # 17 <= len <= 31
|
||||
SYM_FUNC_END(crc_t10dif_pcl)
|
||||
|
||||
.section .rodata, "a", @progbits
|
||||
.align 16
|
||||
|
||||
# Fold constants precomputed from the polynomial 0x18bb7
|
||||
# G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
|
||||
.Lfold_across_128_bytes_consts:
|
||||
.quad 0x0000000000006123 # x^(8*128) mod G(x)
|
||||
.quad 0x0000000000002295 # x^(8*128+64) mod G(x)
|
||||
.Lfold_across_64_bytes_consts:
|
||||
.quad 0x0000000000001069 # x^(4*128) mod G(x)
|
||||
.quad 0x000000000000dd31 # x^(4*128+64) mod G(x)
|
||||
.Lfold_across_32_bytes_consts:
|
||||
.quad 0x000000000000857d # x^(2*128) mod G(x)
|
||||
.quad 0x0000000000007acc # x^(2*128+64) mod G(x)
|
||||
.Lfold_across_16_bytes_consts:
|
||||
.quad 0x000000000000a010 # x^(1*128) mod G(x)
|
||||
.quad 0x0000000000001faa # x^(1*128+64) mod G(x)
|
||||
.Lfinal_fold_consts:
|
||||
.quad 0x1368000000000000 # x^48 * (x^48 mod G(x))
|
||||
.quad 0x2d56000000000000 # x^48 * (x^80 mod G(x))
|
||||
.Lbarrett_reduction_consts:
|
||||
.quad 0x0000000000018bb7 # G(x)
|
||||
.quad 0x00000001f65a57f8 # floor(x^48 / G(x))
|
||||
|
||||
.section .rodata.cst16.mask1, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lmask1:
|
||||
.octa 0x80808080808080808080808080808080
|
||||
|
||||
.section .rodata.cst16.mask2, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lmask2:
|
||||
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
|
||||
|
||||
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
|
||||
.align 16
|
||||
.Lbswap_mask:
|
||||
.octa 0x000102030405060708090A0B0C0D0E0F
|
||||
|
||||
.section .rodata.cst32.byteshift_table, "aM", @progbits, 32
|
||||
.align 16
|
||||
# For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len]
|
||||
# is the index vector to shift left by 'len' bytes, and is also {0x80, ...,
|
||||
# 0x80} XOR the index vector to shift right by '16 - len' bytes.
|
||||
.Lbyteshift_table:
|
||||
.byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
|
||||
.byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
|
||||
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0
|
||||
|
|
@ -63,7 +63,7 @@ config BLK_DEV_BSGLIB
|
|||
config BLK_DEV_INTEGRITY
|
||||
bool "Block layer data integrity support"
|
||||
select CRC_T10DIF
|
||||
select CRC64_ROCKSOFT
|
||||
select CRC64
|
||||
help
|
||||
Some storage devices allow extra information to be
|
||||
stored/retrieved to help protect the data. The block layer
|
||||
|
|
|
|||
|
|
@ -210,7 +210,7 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
|
|||
|
||||
static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
|
||||
{
|
||||
return cpu_to_be64(crc64_rocksoft_update(crc, data, len));
|
||||
return cpu_to_be64(crc64_nvme(crc, data, len));
|
||||
}
|
||||
|
||||
static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,
|
||||
|
|
|
|||
|
|
@ -1081,26 +1081,6 @@ config CRYPTO_CRC32
|
|||
|
||||
Used by RoCEv2 and f2fs.
|
||||
|
||||
config CRYPTO_CRCT10DIF
|
||||
tristate "CRCT10DIF"
|
||||
select CRYPTO_HASH
|
||||
select CRC_T10DIF
|
||||
help
|
||||
CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF)
|
||||
|
||||
CRC algorithm used by the SCSI Block Commands standard.
|
||||
|
||||
config CRYPTO_CRC64_ROCKSOFT
|
||||
tristate "CRC64 based on Rocksoft Model algorithm"
|
||||
depends on CRC64
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
CRC64 CRC algorithm based on the Rocksoft Model CRC Algorithm
|
||||
|
||||
Used by the NVMe implementation of T10 DIF (BLK_DEV_INTEGRITY)
|
||||
|
||||
See https://zlib.net/crc_v3.txt
|
||||
|
||||
endmenu
|
||||
|
||||
menu "Compression"
|
||||
|
|
|
|||
|
|
@ -155,9 +155,6 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
|
|||
obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o
|
||||
CFLAGS_crc32c_generic.o += -DARCH=$(ARCH)
|
||||
CFLAGS_crc32_generic.o += -DARCH=$(ARCH)
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_generic.o
|
||||
CFLAGS_crct10dif_generic.o += -DARCH=$(ARCH)
|
||||
obj-$(CONFIG_CRYPTO_CRC64_ROCKSOFT) += crc64_rocksoft_generic.o
|
||||
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
|
||||
obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
|
||||
obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data,
|
|||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = crc32c_le_base(ctx->crc, data, length);
|
||||
ctx->crc = crc32c_base(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +94,7 @@ static int chksum_update_arch(struct shash_desc *desc, const u8 *data,
|
|||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = __crc32c_le(ctx->crc, data, length);
|
||||
ctx->crc = crc32c(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -108,14 +108,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
|
|||
|
||||
static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
put_unaligned_le32(~crc32c_le_base(*crcp, data, len), out);
|
||||
put_unaligned_le32(~crc32c_base(*crcp, data, len), out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
put_unaligned_le32(~__crc32c_le(*crcp, data, len), out);
|
||||
put_unaligned_le32(~crc32c(*crcp, data, len), out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,89 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
#include <linux/crc64.h>
|
||||
#include <linux/module.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/unaligned.h>
|
||||
|
||||
static int chksum_init(struct shash_desc *desc)
|
||||
{
|
||||
u64 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u64 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = crc64_rocksoft_generic(*crc, data, length);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u64 *crc = shash_desc_ctx(desc);
|
||||
|
||||
put_unaligned_le64(*crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup(u64 crc, const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
crc = crc64_rocksoft_generic(crc, data, len);
|
||||
put_unaligned_le64(crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
u64 *crc = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(*crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksum_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
return __chksum_finup(0, data, length, out);
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = sizeof(u64),
|
||||
.init = chksum_init,
|
||||
.update = chksum_update,
|
||||
.final = chksum_final,
|
||||
.finup = chksum_finup,
|
||||
.digest = chksum_digest,
|
||||
.descsize = sizeof(u64),
|
||||
.base = {
|
||||
.cra_name = CRC64_ROCKSOFT_STRING,
|
||||
.cra_driver_name = "crc64-rocksoft-generic",
|
||||
.cra_priority = 200,
|
||||
.cra_blocksize = 1,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init crc64_rocksoft_init(void)
|
||||
{
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
static void __exit crc64_rocksoft_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(crc64_rocksoft_init);
|
||||
module_exit(crc64_rocksoft_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Rocksoft model CRC64 calculation.");
|
||||
MODULE_ALIAS_CRYPTO("crc64-rocksoft");
|
||||
MODULE_ALIAS_CRYPTO("crc64-rocksoft-generic");
|
||||
|
|
@ -1,168 +0,0 @@
|
|||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* T10 Data Integrity Field CRC16 Crypto Transform
|
||||
*
|
||||
* Copyright (c) 2007 Oracle Corporation. All rights reserved.
|
||||
* Written by Martin K. Petersen <martin.petersen@oracle.com>
|
||||
* Copyright (C) 2013 Intel Corporation
|
||||
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/crc-t10dif.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
|
||||
struct chksum_desc_ctx {
|
||||
__u16 crc;
|
||||
};
|
||||
|
||||
/*
|
||||
* Steps through buffer one byte at a time, calculates reflected
|
||||
* crc using table.
|
||||
*/
|
||||
|
||||
static int chksum_init(struct shash_desc *desc)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_update_arch(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
ctx->crc = crc_t10dif_update(ctx->crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
*(__u16 *)out = ctx->crc;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out)
|
||||
{
|
||||
*(__u16 *)out = crc_t10dif_generic(crc, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __chksum_finup_arch(__u16 crc, const u8 *data, unsigned int len,
|
||||
u8 *out)
|
||||
{
|
||||
*(__u16 *)out = crc_t10dif_update(crc, data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int chksum_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup(ctx->crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksum_finup_arch(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
return __chksum_finup_arch(ctx->crc, data, len, out);
|
||||
}
|
||||
|
||||
static int chksum_digest(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
return __chksum_finup(0, data, length, out);
|
||||
}
|
||||
|
||||
static int chksum_digest_arch(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length, u8 *out)
|
||||
{
|
||||
return __chksum_finup_arch(0, data, length, out);
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = {{
|
||||
.digestsize = CRC_T10DIF_DIGEST_SIZE,
|
||||
.init = chksum_init,
|
||||
.update = chksum_update,
|
||||
.final = chksum_final,
|
||||
.finup = chksum_finup,
|
||||
.digest = chksum_digest,
|
||||
.descsize = sizeof(struct chksum_desc_ctx),
|
||||
.base.cra_name = "crct10dif",
|
||||
.base.cra_driver_name = "crct10dif-generic",
|
||||
.base.cra_priority = 100,
|
||||
.base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}, {
|
||||
.digestsize = CRC_T10DIF_DIGEST_SIZE,
|
||||
.init = chksum_init,
|
||||
.update = chksum_update_arch,
|
||||
.final = chksum_final,
|
||||
.finup = chksum_finup_arch,
|
||||
.digest = chksum_digest_arch,
|
||||
.descsize = sizeof(struct chksum_desc_ctx),
|
||||
.base.cra_name = "crct10dif",
|
||||
.base.cra_driver_name = "crct10dif-" __stringify(ARCH),
|
||||
.base.cra_priority = 150,
|
||||
.base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}};
|
||||
|
||||
static int num_algs;
|
||||
|
||||
static int __init crct10dif_mod_init(void)
|
||||
{
|
||||
/* register the arch flavor only if it differs from the generic one */
|
||||
num_algs = 1 + crc_t10dif_is_optimized();
|
||||
|
||||
return crypto_register_shashes(algs, num_algs);
|
||||
}
|
||||
|
||||
static void __exit crct10dif_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shashes(algs, num_algs);
|
||||
}
|
||||
|
||||
subsys_initcall(crct10dif_mod_init);
|
||||
module_exit(crct10dif_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
|
||||
MODULE_DESCRIPTION("T10 DIF CRC calculation.");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_ALIAS_CRYPTO("crct10dif");
|
||||
MODULE_ALIAS_CRYPTO("crct10dif-generic");
|
||||
|
|
@ -1654,10 +1654,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
|||
ret = min(ret, tcrypt_test("ghash"));
|
||||
break;
|
||||
|
||||
case 47:
|
||||
ret = min(ret, tcrypt_test("crct10dif"));
|
||||
break;
|
||||
|
||||
case 48:
|
||||
ret = min(ret, tcrypt_test("sha3-224"));
|
||||
break;
|
||||
|
|
@ -2272,10 +2268,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
|
|||
test_hash_speed("crc32c", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
fallthrough;
|
||||
case 320:
|
||||
test_hash_speed("crct10dif", sec, generic_hash_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
fallthrough;
|
||||
case 321:
|
||||
test_hash_speed("poly1305", sec, poly1305_speed_template);
|
||||
if (mode > 300 && mode < 400) break;
|
||||
|
|
|
|||
|
|
@ -4759,20 +4759,6 @@ static const struct alg_test_desc alg_test_descs[] = {
|
|||
.suite = {
|
||||
.hash = __VECS(crc32c_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "crc64-rocksoft",
|
||||
.test = alg_test_hash,
|
||||
.fips_allowed = 1,
|
||||
.suite = {
|
||||
.hash = __VECS(crc64_rocksoft_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "crct10dif",
|
||||
.test = alg_test_hash,
|
||||
.fips_allowed = 1,
|
||||
.suite = {
|
||||
.hash = __VECS(crct10dif_tv_template)
|
||||
}
|
||||
}, {
|
||||
.alg = "ctr(aes)",
|
||||
.test = alg_test_skcipher,
|
||||
|
|
|
|||
303
crypto/testmgr.h
303
crypto/testmgr.h
|
|
@ -6017,309 +6017,6 @@ static const struct hash_testvec rmd160_tv_template[] = {
|
|||
}
|
||||
};
|
||||
|
||||
static const u8 zeroes[4096] = { [0 ... 4095] = 0 };
|
||||
static const u8 ones[4096] = { [0 ... 4095] = 0xff };
|
||||
|
||||
static const struct hash_testvec crc64_rocksoft_tv_template[] = {
|
||||
{
|
||||
.plaintext = zeroes,
|
||||
.psize = 4096,
|
||||
.digest = "\x4e\xb6\x22\xeb\x67\xd3\x82\x64",
|
||||
}, {
|
||||
.plaintext = ones,
|
||||
.psize = 4096,
|
||||
.digest = "\xac\xa3\xec\x02\x73\xba\xdd\xc0",
|
||||
}
|
||||
};
|
||||
|
||||
static const struct hash_testvec crct10dif_tv_template[] = {
|
||||
{
|
||||
.plaintext = "abc",
|
||||
.psize = 3,
|
||||
.digest = (u8 *)(u16 []){ 0x443b },
|
||||
}, {
|
||||
.plaintext = "1234567890123456789012345678901234567890"
|
||||
"123456789012345678901234567890123456789",
|
||||
.psize = 79,
|
||||
.digest = (u8 *)(u16 []){ 0x4b70 },
|
||||
}, {
|
||||
.plaintext = "abcdddddddddddddddddddddddddddddddddddddddd"
|
||||
"ddddddddddddd",
|
||||
.psize = 56,
|
||||
.digest = (u8 *)(u16 []){ 0x9ce3 },
|
||||
}, {
|
||||
.plaintext = "1234567890123456789012345678901234567890"
|
||||
"1234567890123456789012345678901234567890"
|
||||
"1234567890123456789012345678901234567890"
|
||||
"1234567890123456789012345678901234567890"
|
||||
"1234567890123456789012345678901234567890"
|
||||
"1234567890123456789012345678901234567890"
|
||||
"1234567890123456789012345678901234567890"
|
||||
"123456789012345678901234567890123456789",
|
||||
.psize = 319,
|
||||
.digest = (u8 *)(u16 []){ 0x44c6 },
|
||||
}, {
|
||||
.plaintext = "\x6e\x05\x79\x10\xa7\x1b\xb2\x49"
|
||||
"\xe0\x54\xeb\x82\x19\x8d\x24\xbb"
|
||||
"\x2f\xc6\x5d\xf4\x68\xff\x96\x0a"
|
||||
"\xa1\x38\xcf\x43\xda\x71\x08\x7c"
|
||||
"\x13\xaa\x1e\xb5\x4c\xe3\x57\xee"
|
||||
"\x85\x1c\x90\x27\xbe\x32\xc9\x60"
|
||||
"\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2"
|
||||
"\x46\xdd\x74\x0b\x7f\x16\xad\x21"
|
||||
"\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93"
|
||||
"\x2a\xc1\x35\xcc\x63\xfa\x6e\x05"
|
||||
"\x9c\x10\xa7\x3e\xd5\x49\xe0\x77"
|
||||
"\x0e\x82\x19\xb0\x24\xbb\x52\xe9"
|
||||
"\x5d\xf4\x8b\x22\x96\x2d\xc4\x38"
|
||||
"\xcf\x66\xfd\x71\x08\x9f\x13\xaa"
|
||||
"\x41\xd8\x4c\xe3\x7a\x11\x85\x1c"
|
||||
"\xb3\x27\xbe\x55\xec\x60\xf7\x8e"
|
||||
"\x02\x99\x30\xc7\x3b\xd2\x69\x00"
|
||||
"\x74\x0b\xa2\x16\xad\x44\xdb\x4f"
|
||||
"\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1"
|
||||
"\x58\xef\x63\xfa\x91\x05\x9c\x33"
|
||||
"\xca\x3e\xd5\x6c\x03\x77\x0e\xa5"
|
||||
"\x19\xb0\x47\xde\x52\xe9\x80\x17"
|
||||
"\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66"
|
||||
"\xfd\x94\x08\x9f\x36\xcd\x41\xd8"
|
||||
"\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a"
|
||||
"\xe1\x55\xec\x83\x1a\x8e\x25\xbc"
|
||||
"\x30\xc7\x5e\xf5\x69\x00\x97\x0b"
|
||||
"\xa2\x39\xd0\x44\xdb\x72\x09\x7d"
|
||||
"\x14\xab\x1f\xb6\x4d\xe4\x58\xef"
|
||||
"\x86\x1d\x91\x28\xbf\x33\xca\x61"
|
||||
"\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3"
|
||||
"\x47\xde\x75\x0c\x80\x17\xae\x22"
|
||||
"\xb9\x50\xe7\x5b\xf2\x89\x20\x94"
|
||||
"\x2b\xc2\x36\xcd\x64\xfb\x6f\x06"
|
||||
"\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78"
|
||||
"\x0f\x83\x1a\xb1\x25\xbc\x53\xea"
|
||||
"\x5e\xf5\x8c\x00\x97\x2e\xc5\x39"
|
||||
"\xd0\x67\xfe\x72\x09\xa0\x14\xab"
|
||||
"\x42\xd9\x4d\xe4\x7b\x12\x86\x1d"
|
||||
"\xb4\x28\xbf\x56\xed\x61\xf8\x8f"
|
||||
"\x03\x9a\x31\xc8\x3c\xd3\x6a\x01"
|
||||
"\x75\x0c\xa3\x17\xae\x45\xdc\x50"
|
||||
"\xe7\x7e\x15\x89\x20\xb7\x2b\xc2"
|
||||
"\x59\xf0\x64\xfb\x92\x06\x9d\x34"
|
||||
"\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6"
|
||||
"\x1a\xb1\x48\xdf\x53\xea\x81\x18"
|
||||
"\x8c\x23\xba\x2e\xc5\x5c\xf3\x67"
|
||||
"\xfe\x95\x09\xa0\x37\xce\x42\xd9"
|
||||
"\x70\x07\x7b\x12\xa9\x1d\xb4\x4b"
|
||||
"\xe2\x56\xed\x84\x1b\x8f\x26\xbd"
|
||||
"\x31\xc8\x5f\xf6\x6a\x01\x98\x0c"
|
||||
"\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e"
|
||||
"\x15\xac\x20\xb7\x4e\xe5\x59\xf0"
|
||||
"\x87\x1e\x92\x29\xc0\x34\xcb\x62"
|
||||
"\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4"
|
||||
"\x48\xdf\x76\x0d\x81\x18\xaf\x23"
|
||||
"\xba\x51\xe8\x5c\xf3\x8a\x21\x95"
|
||||
"\x2c\xc3\x37\xce\x65\xfc\x70\x07"
|
||||
"\x9e\x12\xa9\x40\xd7\x4b\xe2\x79"
|
||||
"\x10\x84\x1b\xb2\x26\xbd\x54\xeb"
|
||||
"\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a"
|
||||
"\xd1\x68\xff\x73\x0a\xa1\x15\xac"
|
||||
"\x43\xda\x4e\xe5\x7c\x13\x87\x1e"
|
||||
"\xb5\x29\xc0\x57\xee\x62\xf9\x90"
|
||||
"\x04\x9b\x32\xc9\x3d\xd4\x6b\x02"
|
||||
"\x76\x0d\xa4\x18\xaf\x46\xdd\x51"
|
||||
"\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3"
|
||||
"\x5a\xf1\x65\xfc\x93\x07\x9e\x35"
|
||||
"\xcc\x40\xd7\x6e\x05\x79\x10\xa7"
|
||||
"\x1b\xb2\x49\xe0\x54\xeb\x82\x19"
|
||||
"\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68"
|
||||
"\xff\x96\x0a\xa1\x38\xcf\x43\xda"
|
||||
"\x71\x08\x7c\x13\xaa\x1e\xb5\x4c"
|
||||
"\xe3\x57\xee\x85\x1c\x90\x27\xbe"
|
||||
"\x32\xc9\x60\xf7\x6b\x02\x99\x0d"
|
||||
"\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f"
|
||||
"\x16\xad\x21\xb8\x4f\xe6\x5a\xf1"
|
||||
"\x88\x1f\x93\x2a\xc1\x35\xcc\x63"
|
||||
"\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5"
|
||||
"\x49\xe0\x77\x0e\x82\x19\xb0\x24"
|
||||
"\xbb\x52\xe9\x5d\xf4\x8b\x22\x96"
|
||||
"\x2d\xc4\x38\xcf\x66\xfd\x71\x08"
|
||||
"\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a"
|
||||
"\x11\x85\x1c\xb3\x27\xbe\x55\xec"
|
||||
"\x60\xf7\x8e\x02\x99\x30\xc7\x3b"
|
||||
"\xd2\x69\x00\x74\x0b\xa2\x16\xad"
|
||||
"\x44\xdb\x4f\xe6\x7d\x14\x88\x1f"
|
||||
"\xb6\x2a\xc1\x58\xef\x63\xfa\x91"
|
||||
"\x05\x9c\x33\xca\x3e\xd5\x6c\x03"
|
||||
"\x77\x0e\xa5\x19\xb0\x47\xde\x52"
|
||||
"\xe9\x80\x17\x8b\x22\xb9\x2d\xc4"
|
||||
"\x5b\xf2\x66\xfd\x94\x08\x9f\x36"
|
||||
"\xcd\x41\xd8\x6f\x06\x7a\x11\xa8"
|
||||
"\x1c\xb3\x4a\xe1\x55\xec\x83\x1a"
|
||||
"\x8e\x25\xbc\x30\xc7\x5e\xf5\x69"
|
||||
"\x00\x97\x0b\xa2\x39\xd0\x44\xdb"
|
||||
"\x72\x09\x7d\x14\xab\x1f\xb6\x4d"
|
||||
"\xe4\x58\xef\x86\x1d\x91\x28\xbf"
|
||||
"\x33\xca\x61\xf8\x6c\x03\x9a\x0e"
|
||||
"\xa5\x3c\xd3\x47\xde\x75\x0c\x80"
|
||||
"\x17\xae\x22\xb9\x50\xe7\x5b\xf2"
|
||||
"\x89\x20\x94\x2b\xc2\x36\xcd\x64"
|
||||
"\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6"
|
||||
"\x4a\xe1\x78\x0f\x83\x1a\xb1\x25"
|
||||
"\xbc\x53\xea\x5e\xf5\x8c\x00\x97"
|
||||
"\x2e\xc5\x39\xd0\x67\xfe\x72\x09"
|
||||
"\xa0\x14\xab\x42\xd9\x4d\xe4\x7b"
|
||||
"\x12\x86\x1d\xb4\x28\xbf\x56\xed"
|
||||
"\x61\xf8\x8f\x03\x9a\x31\xc8\x3c"
|
||||
"\xd3\x6a\x01\x75\x0c\xa3\x17\xae"
|
||||
"\x45\xdc\x50\xe7\x7e\x15\x89\x20"
|
||||
"\xb7\x2b\xc2\x59\xf0\x64\xfb\x92"
|
||||
"\x06\x9d\x34\xcb\x3f\xd6\x6d\x04"
|
||||
"\x78\x0f\xa6\x1a\xb1\x48\xdf\x53"
|
||||
"\xea\x81\x18\x8c\x23\xba\x2e\xc5"
|
||||
"\x5c\xf3\x67\xfe\x95\x09\xa0\x37"
|
||||
"\xce\x42\xd9\x70\x07\x7b\x12\xa9"
|
||||
"\x1d\xb4\x4b\xe2\x56\xed\x84\x1b"
|
||||
"\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a"
|
||||
"\x01\x98\x0c\xa3\x3a\xd1\x45\xdc"
|
||||
"\x73\x0a\x7e\x15\xac\x20\xb7\x4e"
|
||||
"\xe5\x59\xf0\x87\x1e\x92\x29\xc0"
|
||||
"\x34\xcb\x62\xf9\x6d\x04\x9b\x0f"
|
||||
"\xa6\x3d\xd4\x48\xdf\x76\x0d\x81"
|
||||
"\x18\xaf\x23\xba\x51\xe8\x5c\xf3"
|
||||
"\x8a\x21\x95\x2c\xc3\x37\xce\x65"
|
||||
"\xfc\x70\x07\x9e\x12\xa9\x40\xd7"
|
||||
"\x4b\xe2\x79\x10\x84\x1b\xb2\x26"
|
||||
"\xbd\x54\xeb\x5f\xf6\x8d\x01\x98"
|
||||
"\x2f\xc6\x3a\xd1\x68\xff\x73\x0a"
|
||||
"\xa1\x15\xac\x43\xda\x4e\xe5\x7c"
|
||||
"\x13\x87\x1e\xb5\x29\xc0\x57\xee"
|
||||
"\x62\xf9\x90\x04\x9b\x32\xc9\x3d"
|
||||
"\xd4\x6b\x02\x76\x0d\xa4\x18\xaf"
|
||||
"\x46\xdd\x51\xe8\x7f\x16\x8a\x21"
|
||||
"\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93"
|
||||
"\x07\x9e\x35\xcc\x40\xd7\x6e\x05"
|
||||
"\x79\x10\xa7\x1b\xb2\x49\xe0\x54"
|
||||
"\xeb\x82\x19\x8d\x24\xbb\x2f\xc6"
|
||||
"\x5d\xf4\x68\xff\x96\x0a\xa1\x38"
|
||||
"\xcf\x43\xda\x71\x08\x7c\x13\xaa"
|
||||
"\x1e\xb5\x4c\xe3\x57\xee\x85\x1c"
|
||||
"\x90\x27\xbe\x32\xc9\x60\xf7\x6b"
|
||||
"\x02\x99\x0d\xa4\x3b\xd2\x46\xdd"
|
||||
"\x74\x0b\x7f\x16\xad\x21\xb8\x4f"
|
||||
"\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1"
|
||||
"\x35\xcc\x63\xfa\x6e\x05\x9c\x10"
|
||||
"\xa7\x3e\xd5\x49\xe0\x77\x0e\x82"
|
||||
"\x19\xb0\x24\xbb\x52\xe9\x5d\xf4"
|
||||
"\x8b\x22\x96\x2d\xc4\x38\xcf\x66"
|
||||
"\xfd\x71\x08\x9f\x13\xaa\x41\xd8"
|
||||
"\x4c\xe3\x7a\x11\x85\x1c\xb3\x27"
|
||||
"\xbe\x55\xec\x60\xf7\x8e\x02\x99"
|
||||
"\x30\xc7\x3b\xd2\x69\x00\x74\x0b"
|
||||
"\xa2\x16\xad\x44\xdb\x4f\xe6\x7d"
|
||||
"\x14\x88\x1f\xb6\x2a\xc1\x58\xef"
|
||||
"\x63\xfa\x91\x05\x9c\x33\xca\x3e"
|
||||
"\xd5\x6c\x03\x77\x0e\xa5\x19\xb0"
|
||||
"\x47\xde\x52\xe9\x80\x17\x8b\x22"
|
||||
"\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94"
|
||||
"\x08\x9f\x36\xcd\x41\xd8\x6f\x06"
|
||||
"\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55"
|
||||
"\xec\x83\x1a\x8e\x25\xbc\x30\xc7"
|
||||
"\x5e\xf5\x69\x00\x97\x0b\xa2\x39"
|
||||
"\xd0\x44\xdb\x72\x09\x7d\x14\xab"
|
||||
"\x1f\xb6\x4d\xe4\x58\xef\x86\x1d"
|
||||
"\x91\x28\xbf\x33\xca\x61\xf8\x6c"
|
||||
"\x03\x9a\x0e\xa5\x3c\xd3\x47\xde"
|
||||
"\x75\x0c\x80\x17\xae\x22\xb9\x50"
|
||||
"\xe7\x5b\xf2\x89\x20\x94\x2b\xc2"
|
||||
"\x36\xcd\x64\xfb\x6f\x06\x9d\x11"
|
||||
"\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83"
|
||||
"\x1a\xb1\x25\xbc\x53\xea\x5e\xf5"
|
||||
"\x8c\x00\x97\x2e\xc5\x39\xd0\x67"
|
||||
"\xfe\x72\x09\xa0\x14\xab\x42\xd9"
|
||||
"\x4d\xe4\x7b\x12\x86\x1d\xb4\x28"
|
||||
"\xbf\x56\xed\x61\xf8\x8f\x03\x9a"
|
||||
"\x31\xc8\x3c\xd3\x6a\x01\x75\x0c"
|
||||
"\xa3\x17\xae\x45\xdc\x50\xe7\x7e"
|
||||
"\x15\x89\x20\xb7\x2b\xc2\x59\xf0"
|
||||
"\x64\xfb\x92\x06\x9d\x34\xcb\x3f"
|
||||
"\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1"
|
||||
"\x48\xdf\x53\xea\x81\x18\x8c\x23"
|
||||
"\xba\x2e\xc5\x5c\xf3\x67\xfe\x95"
|
||||
"\x09\xa0\x37\xce\x42\xd9\x70\x07"
|
||||
"\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56"
|
||||
"\xed\x84\x1b\x8f\x26\xbd\x31\xc8"
|
||||
"\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a"
|
||||
"\xd1\x45\xdc\x73\x0a\x7e\x15\xac"
|
||||
"\x20\xb7\x4e\xe5\x59\xf0\x87\x1e"
|
||||
"\x92\x29\xc0\x34\xcb\x62\xf9\x6d"
|
||||
"\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf"
|
||||
"\x76\x0d\x81\x18\xaf\x23\xba\x51"
|
||||
"\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3"
|
||||
"\x37\xce\x65\xfc\x70\x07\x9e\x12"
|
||||
"\xa9\x40\xd7\x4b\xe2\x79\x10\x84"
|
||||
"\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6"
|
||||
"\x8d\x01\x98\x2f\xc6\x3a\xd1\x68"
|
||||
"\xff\x73\x0a\xa1\x15\xac\x43\xda"
|
||||
"\x4e\xe5\x7c\x13\x87\x1e\xb5\x29"
|
||||
"\xc0\x57\xee\x62\xf9\x90\x04\x9b"
|
||||
"\x32\xc9\x3d\xd4\x6b\x02\x76\x0d"
|
||||
"\xa4\x18\xaf\x46\xdd\x51\xe8\x7f"
|
||||
"\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1"
|
||||
"\x65\xfc\x93\x07\x9e\x35\xcc\x40"
|
||||
"\xd7\x6e\x05\x79\x10\xa7\x1b\xb2"
|
||||
"\x49\xe0\x54\xeb\x82\x19\x8d\x24"
|
||||
"\xbb\x2f\xc6\x5d\xf4\x68\xff\x96"
|
||||
"\x0a\xa1\x38\xcf\x43\xda\x71\x08"
|
||||
"\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57"
|
||||
"\xee\x85\x1c\x90\x27\xbe\x32\xc9"
|
||||
"\x60\xf7\x6b\x02\x99\x0d\xa4\x3b"
|
||||
"\xd2\x46\xdd\x74\x0b\x7f\x16\xad"
|
||||
"\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f"
|
||||
"\x93\x2a\xc1\x35\xcc\x63\xfa\x6e"
|
||||
"\x05\x9c\x10\xa7\x3e\xd5\x49\xe0"
|
||||
"\x77\x0e\x82\x19\xb0\x24\xbb\x52"
|
||||
"\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4"
|
||||
"\x38\xcf\x66\xfd\x71\x08\x9f\x13"
|
||||
"\xaa\x41\xd8\x4c\xe3\x7a\x11\x85"
|
||||
"\x1c\xb3\x27\xbe\x55\xec\x60\xf7"
|
||||
"\x8e\x02\x99\x30\xc7\x3b\xd2\x69"
|
||||
"\x00\x74\x0b\xa2\x16\xad\x44\xdb"
|
||||
"\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a"
|
||||
"\xc1\x58\xef\x63\xfa\x91\x05\x9c"
|
||||
"\x33\xca\x3e\xd5\x6c\x03\x77\x0e"
|
||||
"\xa5\x19\xb0\x47\xde\x52\xe9\x80"
|
||||
"\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2"
|
||||
"\x66\xfd\x94\x08\x9f\x36\xcd\x41"
|
||||
"\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3"
|
||||
"\x4a\xe1\x55\xec\x83\x1a\x8e\x25"
|
||||
"\xbc\x30\xc7\x5e\xf5\x69\x00\x97"
|
||||
"\x0b\xa2\x39\xd0\x44\xdb\x72\x09"
|
||||
"\x7d\x14\xab\x1f\xb6\x4d\xe4\x58"
|
||||
"\xef\x86\x1d\x91\x28\xbf\x33\xca"
|
||||
"\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c"
|
||||
"\xd3\x47\xde\x75\x0c\x80\x17\xae"
|
||||
"\x22\xb9\x50\xe7\x5b\xf2\x89\x20"
|
||||
"\x94\x2b\xc2\x36\xcd\x64\xfb\x6f"
|
||||
"\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1"
|
||||
"\x78\x0f\x83\x1a\xb1\x25\xbc\x53"
|
||||
"\xea\x5e\xf5\x8c\x00\x97\x2e\xc5"
|
||||
"\x39\xd0\x67\xfe\x72\x09\xa0\x14"
|
||||
"\xab\x42\xd9\x4d\xe4\x7b\x12\x86"
|
||||
"\x1d\xb4\x28\xbf\x56\xed\x61\xf8"
|
||||
"\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a"
|
||||
"\x01\x75\x0c\xa3\x17\xae\x45\xdc"
|
||||
"\x50\xe7\x7e\x15\x89\x20\xb7\x2b"
|
||||
"\xc2\x59\xf0\x64\xfb\x92\x06\x9d"
|
||||
"\x34\xcb\x3f\xd6\x6d\x04\x78\x0f"
|
||||
"\xa6\x1a\xb1\x48\xdf\x53\xea\x81"
|
||||
"\x18\x8c\x23\xba\x2e\xc5\x5c\xf3"
|
||||
"\x67\xfe\x95\x09\xa0\x37\xce\x42"
|
||||
"\xd9\x70\x07\x7b\x12\xa9\x1d\xb4"
|
||||
"\x4b\xe2\x56\xed\x84\x1b\x8f\x26"
|
||||
"\xbd\x31\xc8\x5f\xf6\x6a\x01\x98",
|
||||
.psize = 2048,
|
||||
.digest = (u8 *)(u16 []){ 0x23ca },
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* Streebog test vectors from RFC 6986 and GOST R 34.11-2012
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ static int burst_update(struct shash_desc *desc, const u8 *d8,
|
|||
if (mctx->poly == CRC32_POLY_LE)
|
||||
ctx->partial = crc32_le(ctx->partial, d8, length);
|
||||
else
|
||||
ctx->partial = __crc32c_le(ctx->partial, d8, length);
|
||||
ctx->partial = crc32c(ctx->partial, d8, length);
|
||||
|
||||
goto pm_out;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -676,8 +676,8 @@ static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum)
|
|||
static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset,
|
||||
int len)
|
||||
{
|
||||
return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
|
||||
(__force __u32)csum2, len);
|
||||
return (__force __wsum)crc32c_combine((__force __u32)csum,
|
||||
(__force __u32)csum2, len);
|
||||
}
|
||||
|
||||
static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
|
||||
|
|
|
|||
|
|
@ -714,7 +714,7 @@ static void r5l_submit_current_io(struct r5l_log *log)
|
|||
|
||||
block = page_address(io->meta_page);
|
||||
block->meta_size = cpu_to_le32(io->meta_offset);
|
||||
crc = crc32c_le(log->uuid_checksum, block, PAGE_SIZE);
|
||||
crc = crc32c(log->uuid_checksum, block, PAGE_SIZE);
|
||||
block->checksum = cpu_to_le32(crc);
|
||||
|
||||
log->current_io = NULL;
|
||||
|
|
@ -1020,8 +1020,8 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
|
|||
if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
|
||||
continue;
|
||||
addr = kmap_local_page(sh->dev[i].page);
|
||||
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
|
||||
addr, PAGE_SIZE);
|
||||
sh->dev[i].log_checksum = crc32c(log->uuid_checksum,
|
||||
addr, PAGE_SIZE);
|
||||
kunmap_local(addr);
|
||||
}
|
||||
parity_pages = 1 + !!(sh->qd_idx >= 0);
|
||||
|
|
@ -1741,7 +1741,7 @@ static int r5l_recovery_read_meta_block(struct r5l_log *log,
|
|||
le64_to_cpu(mb->position) != ctx->pos)
|
||||
return -EINVAL;
|
||||
|
||||
crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE);
|
||||
crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE);
|
||||
if (stored_crc != crc)
|
||||
return -EINVAL;
|
||||
|
||||
|
|
@ -1780,8 +1780,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
|
|||
return -ENOMEM;
|
||||
r5l_recovery_create_empty_meta_block(log, page, pos, seq);
|
||||
mb = page_address(page);
|
||||
mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
|
||||
mb, PAGE_SIZE));
|
||||
mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum, mb, PAGE_SIZE));
|
||||
if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE |
|
||||
REQ_SYNC | REQ_FUA, false)) {
|
||||
__free_page(page);
|
||||
|
|
@ -1976,7 +1975,7 @@ r5l_recovery_verify_data_checksum(struct r5l_log *log,
|
|||
|
||||
r5l_recovery_read_page(log, ctx, page, log_offset);
|
||||
addr = kmap_local_page(page);
|
||||
checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
|
||||
checksum = crc32c(log->uuid_checksum, addr, PAGE_SIZE);
|
||||
kunmap_local(addr);
|
||||
return (le32_to_cpu(log_checksum) == checksum) ? 0 : -EINVAL;
|
||||
}
|
||||
|
|
@ -2379,8 +2378,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
|
|||
raid5_compute_blocknr(sh, i, 0));
|
||||
addr = kmap_local_page(dev->page);
|
||||
payload->checksum[0] = cpu_to_le32(
|
||||
crc32c_le(log->uuid_checksum, addr,
|
||||
PAGE_SIZE));
|
||||
crc32c(log->uuid_checksum, addr,
|
||||
PAGE_SIZE));
|
||||
kunmap_local(addr);
|
||||
sync_page_io(log->rdev, write_pos, PAGE_SIZE,
|
||||
dev->page, REQ_OP_WRITE, false);
|
||||
|
|
@ -2392,8 +2391,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
|
|||
}
|
||||
}
|
||||
mb->meta_size = cpu_to_le32(offset);
|
||||
mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
|
||||
mb, PAGE_SIZE));
|
||||
mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum,
|
||||
mb, PAGE_SIZE));
|
||||
sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
|
||||
REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false);
|
||||
sh->log_start = ctx->pos;
|
||||
|
|
@ -2885,8 +2884,8 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
|
|||
if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
|
||||
continue;
|
||||
addr = kmap_local_page(sh->dev[i].page);
|
||||
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
|
||||
addr, PAGE_SIZE);
|
||||
sh->dev[i].log_checksum = crc32c(log->uuid_checksum,
|
||||
addr, PAGE_SIZE);
|
||||
kunmap_local(addr);
|
||||
pages++;
|
||||
}
|
||||
|
|
@ -2969,7 +2968,7 @@ static int r5l_load_log(struct r5l_log *log)
|
|||
}
|
||||
stored_crc = le32_to_cpu(mb->checksum);
|
||||
mb->checksum = 0;
|
||||
expected_crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE);
|
||||
expected_crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE);
|
||||
if (stored_crc != expected_crc) {
|
||||
create_super = true;
|
||||
goto create;
|
||||
|
|
@ -3077,8 +3076,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
|
|||
return -ENOMEM;
|
||||
log->rdev = rdev;
|
||||
log->need_cache_flush = bdev_write_cache(rdev->bdev);
|
||||
log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid,
|
||||
sizeof(rdev->mddev->uuid));
|
||||
log->uuid_checksum = crc32c(~0, rdev->mddev->uuid,
|
||||
sizeof(rdev->mddev->uuid));
|
||||
|
||||
mutex_init(&log->io_mutex);
|
||||
|
||||
|
|
|
|||
|
|
@ -346,9 +346,9 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
|
|||
if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) {
|
||||
le32_add_cpu(&e->pp_size, PAGE_SIZE);
|
||||
io->pp_size += PAGE_SIZE;
|
||||
e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum),
|
||||
page_address(sh->ppl_page),
|
||||
PAGE_SIZE));
|
||||
e->checksum = cpu_to_le32(crc32c(le32_to_cpu(e->checksum),
|
||||
page_address(sh->ppl_page),
|
||||
PAGE_SIZE));
|
||||
}
|
||||
|
||||
list_add_tail(&sh->log_list, &io->stripe_list);
|
||||
|
|
@ -454,7 +454,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
|
|||
}
|
||||
|
||||
pplhdr->entries_count = cpu_to_le32(io->entries_count);
|
||||
pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
|
||||
pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PPL_HEADER_SIZE));
|
||||
|
||||
/* Rewind the buffer if current PPL is larger then remaining space */
|
||||
if (log->use_multippl &&
|
||||
|
|
@ -998,7 +998,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
|
|||
goto out;
|
||||
}
|
||||
|
||||
crc = crc32c_le(crc, page_address(page), s);
|
||||
crc = crc32c(crc, page_address(page), s);
|
||||
|
||||
pp_size -= s;
|
||||
sector += s >> 9;
|
||||
|
|
@ -1052,7 +1052,7 @@ static int ppl_write_empty_header(struct ppl_log *log)
|
|||
log->rdev->ppl.size, GFP_NOIO, 0);
|
||||
memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
|
||||
pplhdr->signature = cpu_to_le32(log->ppl_conf->signature);
|
||||
pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
|
||||
pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PAGE_SIZE));
|
||||
|
||||
if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
|
||||
PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
|
||||
|
|
@ -1106,7 +1106,7 @@ static int ppl_load_distributed(struct ppl_log *log)
|
|||
/* check header validity */
|
||||
crc_stored = le32_to_cpu(pplhdr->checksum);
|
||||
pplhdr->checksum = 0;
|
||||
crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
|
||||
crc = ~crc32c(~0, pplhdr, PAGE_SIZE);
|
||||
|
||||
if (crc_stored != crc) {
|
||||
pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n",
|
||||
|
|
@ -1390,7 +1390,7 @@ int ppl_init_log(struct r5conf *conf)
|
|||
spin_lock_init(&ppl_conf->no_mem_stripes_lock);
|
||||
|
||||
if (!mddev->external) {
|
||||
ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
|
||||
ppl_conf->signature = ~crc32c(~0, mddev->uuid, sizeof(mddev->uuid));
|
||||
ppl_conf->block_size = 512;
|
||||
} else {
|
||||
ppl_conf->block_size =
|
||||
|
|
|
|||
|
|
@ -2593,7 +2593,7 @@ void bnx2x_init_rx_mode_obj(struct bnx2x *bp,
|
|||
/********************* Multicast verbs: SET, CLEAR ****************************/
|
||||
static inline u8 bnx2x_mcast_bin_from_mac(u8 *mac)
|
||||
{
|
||||
return (crc32c_le(0, mac, ETH_ALEN) >> 24) & 0xff;
|
||||
return (crc32c(0, mac, ETH_ALEN) >> 24) & 0xff;
|
||||
}
|
||||
|
||||
struct bnx2x_mcast_mac_elem {
|
||||
|
|
|
|||
|
|
@ -312,7 +312,7 @@ static void tb_cfg_print_error(struct tb_ctl *ctl, enum tb_cfg_space space,
|
|||
|
||||
static __be32 tb_crc(const void *data, size_t len)
|
||||
{
|
||||
return cpu_to_be32(~__crc32c_le(~0, data, len));
|
||||
return cpu_to_be32(~crc32c(~0, data, len));
|
||||
}
|
||||
|
||||
static void tb_ctl_pkg_free(struct ctl_pkg *pkg)
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ static u8 tb_crc8(u8 *data, int len)
|
|||
|
||||
static u32 tb_crc32(void *data, size_t len)
|
||||
{
|
||||
return ~__crc32c_le(~0, data, len);
|
||||
return ~crc32c(~0, data, len);
|
||||
}
|
||||
|
||||
#define TB_DROM_DATA_START 13
|
||||
|
|
|
|||
|
|
@ -4,9 +4,6 @@
|
|||
|
||||
#include <linux/types.h>
|
||||
|
||||
#define CRC_T10DIF_DIGEST_SIZE 2
|
||||
#define CRC_T10DIF_BLOCK_SIZE 1
|
||||
|
||||
u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len);
|
||||
u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len);
|
||||
|
||||
|
|
@ -22,13 +19,4 @@ static inline u16 crc_t10dif(const u8 *p, size_t len)
|
|||
return crc_t10dif_update(0, p, len);
|
||||
}
|
||||
|
||||
#if IS_ENABLED(CONFIG_CRC_T10DIF_ARCH)
|
||||
bool crc_t10dif_is_optimized(void);
|
||||
#else
|
||||
static inline bool crc_t10dif_is_optimized(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -8,33 +8,32 @@
|
|||
#include <linux/types.h>
|
||||
#include <linux/bitrev.h>
|
||||
|
||||
u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len);
|
||||
u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len);
|
||||
u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len);
|
||||
u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len);
|
||||
u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len);
|
||||
u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len);
|
||||
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len);
|
||||
u32 crc32_le_base(u32 crc, const u8 *p, size_t len);
|
||||
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len);
|
||||
u32 crc32_be_base(u32 crc, const u8 *p, size_t len);
|
||||
u32 crc32c_arch(u32 crc, const u8 *p, size_t len);
|
||||
u32 crc32c_base(u32 crc, const u8 *p, size_t len);
|
||||
|
||||
static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len)
|
||||
static inline u32 crc32_le(u32 crc, const void *p, size_t len)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_CRC32_ARCH))
|
||||
return crc32_le_arch(crc, p, len);
|
||||
return crc32_le_base(crc, p, len);
|
||||
}
|
||||
|
||||
static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len)
|
||||
static inline u32 crc32_be(u32 crc, const void *p, size_t len)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_CRC32_ARCH))
|
||||
return crc32_be_arch(crc, p, len);
|
||||
return crc32_be_base(crc, p, len);
|
||||
}
|
||||
|
||||
/* TODO: leading underscores should be dropped once callers have been updated */
|
||||
static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
|
||||
static inline u32 crc32c(u32 crc, const void *p, size_t len)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_CRC32_ARCH))
|
||||
return crc32c_le_arch(crc, p, len);
|
||||
return crc32c_le_base(crc, p, len);
|
||||
return crc32c_arch(crc, p, len);
|
||||
return crc32c_base(crc, p, len);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -45,7 +44,7 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
|
|||
*/
|
||||
#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */
|
||||
#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */
|
||||
#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */
|
||||
#define CRC32C_OPTIMIZATION BIT(2) /* crc32c() is optimized */
|
||||
#if IS_ENABLED(CONFIG_CRC32_ARCH)
|
||||
u32 crc32_optimizations(void);
|
||||
#else
|
||||
|
|
@ -70,36 +69,34 @@ static inline u32 crc32_optimizations(void) { return 0; }
|
|||
* with the same initializer as crc1, and crc2 seed was 0. See
|
||||
* also crc32_combine_test().
|
||||
*/
|
||||
u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len);
|
||||
u32 crc32_le_shift(u32 crc, size_t len);
|
||||
|
||||
static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
|
||||
{
|
||||
return crc32_le_shift(crc1, len2) ^ crc2;
|
||||
}
|
||||
|
||||
u32 crc32c_shift(u32 crc, size_t len);
|
||||
|
||||
/**
|
||||
* __crc32c_le_combine - Combine two crc32c check values into one. For two
|
||||
* sequences of bytes, seq1 and seq2 with lengths len1
|
||||
* and len2, __crc32c_le() check values were calculated
|
||||
* for each, crc1 and crc2.
|
||||
* crc32c_combine - Combine two crc32c check values into one. For two sequences
|
||||
* of bytes, seq1 and seq2 with lengths len1 and len2, crc32c()
|
||||
* check values were calculated for each, crc1 and crc2.
|
||||
*
|
||||
* @crc1: crc32c of the first block
|
||||
* @crc2: crc32c of the second block
|
||||
* @len2: length of the second block
|
||||
*
|
||||
* Return: The __crc32c_le() check value of seq1 and seq2 concatenated,
|
||||
* requiring only crc1, crc2, and len2. Note: If seq_full denotes
|
||||
* the concatenated memory area of seq1 with seq2, and crc_full
|
||||
* the __crc32c_le() value of seq_full, then crc_full ==
|
||||
* __crc32c_le_combine(crc1, crc2, len2) when crc_full was
|
||||
* seeded with the same initializer as crc1, and crc2 seed
|
||||
* was 0. See also crc32c_combine_test().
|
||||
* Return: The crc32c() check value of seq1 and seq2 concatenated, requiring
|
||||
* only crc1, crc2, and len2. Note: If seq_full denotes the concatenated
|
||||
* memory area of seq1 with seq2, and crc_full the crc32c() value of
|
||||
* seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when
|
||||
* crc_full was seeded with the same initializer as crc1, and crc2 seed
|
||||
* was 0. See also crc_combine_test().
|
||||
*/
|
||||
u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len);
|
||||
|
||||
static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
|
||||
static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2)
|
||||
{
|
||||
return __crc32c_le_shift(crc1, len2) ^ crc2;
|
||||
return crc32c_shift(crc1, len2) ^ crc2;
|
||||
}
|
||||
|
||||
#define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length)
|
||||
|
|
|
|||
|
|
@ -4,12 +4,4 @@
|
|||
|
||||
#include <linux/crc32.h>
|
||||
|
||||
static inline u32 crc32c(u32 crc, const void *address, unsigned int length)
|
||||
{
|
||||
return __crc32c_le(crc, address, length);
|
||||
}
|
||||
|
||||
/* This macro exists for backwards-compatibility. */
|
||||
#define crc32c_le crc32c
|
||||
|
||||
#endif /* _LINUX_CRC32C_H */
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue