CRC updates for 6.15

Another set of improvements to the kernel's CRC (cyclic redundancy
 check) code:
 
 - Rework the CRC64 library functions to be directly optimized, like what
   I did last cycle for the CRC32 and CRC-T10DIF library functions.
 
 - Rewrite the x86 PCLMULQDQ-optimized CRC code, and add VPCLMULQDQ
   support and acceleration for crc64_be and crc64_nvme.
 
 - Rewrite the riscv Zbc-optimized CRC code, and add acceleration for
   crc_t10dif, crc64_be, and crc64_nvme.
 
 - Remove crc_t10dif and crc64_rocksoft from the crypto API, since they
   are no longer needed there.
 
 - Rename crc64_rocksoft to crc64_nvme, as the old name was incorrect.
 
 - Add kunit test cases for crc64_nvme and crc7.
 
 - Eliminate redundant functions for calculating the Castagnoli CRC32,
   settling on just crc32c().
 
 - Remove unnecessary prompts from some of the CRC kconfig options.
 
 - Further optimize the x86 crc32c code.
 -----BEGIN PGP SIGNATURE-----
 
 iIoEABYIADIWIQSacvsUNc7UX4ntmEPzXCl4vpKOKwUCZ+CGGhQcZWJpZ2dlcnNA
 Z29vZ2xlLmNvbQAKCRDzXCl4vpKOK3wRAP4tbnzawUmlIHIF0hleoADXehUgAhMt
 NZn15mGvyiuwIQEA8W9qvnLdFXZkdxhxAEvDDFjyrRauL6eGtr/GvCx4AQY=
 =wmKG
 -----END PGP SIGNATURE-----

Merge tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux

Pull CRC updates from Eric Biggers:
 "Another set of improvements to the kernel's CRC (cyclic redundancy
  check) code:

   - Rework the CRC64 library functions to be directly optimized, like
     what I did last cycle for the CRC32 and CRC-T10DIF library
     functions

   - Rewrite the x86 PCLMULQDQ-optimized CRC code, and add VPCLMULQDQ
     support and acceleration for crc64_be and crc64_nvme

   - Rewrite the riscv Zbc-optimized CRC code, and add acceleration for
     crc_t10dif, crc64_be, and crc64_nvme

   - Remove crc_t10dif and crc64_rocksoft from the crypto API, since
     they are no longer needed there

   - Rename crc64_rocksoft to crc64_nvme, as the old name was incorrect

   - Add kunit test cases for crc64_nvme and crc7

   - Eliminate redundant functions for calculating the Castagnoli CRC32,
     settling on just crc32c()

   - Remove unnecessary prompts from some of the CRC kconfig options

   - Further optimize the x86 crc32c code"

* tag 'crc-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiggers/linux: (36 commits)
  x86/crc: drop the avx10_256 functions and rename avx10_512 to avx512
  lib/crc: remove unnecessary prompt for CONFIG_CRC64
  lib/crc: remove unnecessary prompt for CONFIG_LIBCRC32C
  lib/crc: remove unnecessary prompt for CONFIG_CRC8
  lib/crc: remove unnecessary prompt for CONFIG_CRC7
  lib/crc: remove unnecessary prompt for CONFIG_CRC4
  lib/crc7: unexport crc7_be_syndrome_table
  lib/crc_kunit.c: update comment in crc_benchmark()
  lib/crc_kunit.c: add test and benchmark for crc7_be()
  x86/crc32: optimize tail handling for crc32c short inputs
  riscv/crc64: add Zbc optimized CRC64 functions
  riscv/crc-t10dif: add Zbc optimized CRC-T10DIF function
  riscv/crc32: reimplement the CRC32 functions using new template
  riscv/crc: add "template" for Zbc optimized CRC functions
  x86/crc: add ANNOTATE_NOENDBR to suppress objtool warnings
  x86/crc32: improve crc32c_arch() code generation with clang
  x86/crc64: implement crc64_be and crc64_nvme using new template
  x86/crc-t10dif: implement crc_t10dif using new template
  x86/crc32: implement crc32_le using new template
  x86/crc: add "template" for [V]PCLMULQDQ based CRC functions
  ...
This commit is contained in:
Linus Torvalds 2025-03-25 18:33:04 -07:00
commit ee6740fd34
115 changed files with 2121 additions and 1969 deletions

View File

@ -6147,6 +6147,7 @@ F: Documentation/staging/crc*
F: arch/*/lib/crc*
F: include/linux/crc*
F: lib/crc*
F: scripts/gen-crc-consts.py
CREATIVE SB0540
M: Bastien Nocera <hadess@hadess.net>

View File

@ -129,7 +129,6 @@ CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
CONFIG_CRC_CCITT=y
CONFIG_LIBCRC32C=y
CONFIG_PRINTK_TIME=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y

View File

@ -113,7 +113,6 @@ CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_LIBCRC32C=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_SLAB=y
CONFIG_DEBUG_SPINLOCK=y

View File

@ -483,8 +483,6 @@ CONFIG_CRYPTO_DEV_SAHARA=y
CONFIG_CRYPTO_DEV_MXS_DCP=y
CONFIG_CRC_CCITT=m
CONFIG_CRC_T10DIF=y
CONFIG_CRC7=m
CONFIG_LIBCRC32C=m
CONFIG_CMA_SIZE_MBYTES=64
CONFIG_FONTS=y
CONFIG_FONT_8x8=y

View File

@ -148,7 +148,6 @@ CONFIG_EXT2_FS=y
CONFIG_JFFS2_FS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_CRC_ITU_T=y
CONFIG_CRC7=y
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_MUST_CHECK is not set
# CONFIG_DEBUG_BUGVERBOSE is not set

View File

@ -118,7 +118,6 @@ CONFIG_TMPFS=y
CONFIG_CONFIGFS_FS=y
CONFIG_JFFS2_FS=y
CONFIG_KEYS=y
CONFIG_CRC32_BIT=y
CONFIG_DMA_API_DEBUG=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y

View File

@ -290,7 +290,6 @@ CONFIG_CRYPTO_CBC=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
CONFIG_CRC_CCITT=y
CONFIG_LIBCRC32C=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y

View File

@ -188,7 +188,6 @@ CONFIG_CRYPTO_CBC=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_DEV_MARVELL_CESA=y
CONFIG_CRC_CCITT=y
CONFIG_LIBCRC32C=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y

View File

@ -161,7 +161,6 @@ CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_15=y
CONFIG_CRYPTO_DEV_MXS_DCP=y
CONFIG_CRC_ITU_T=m
CONFIG_CRC7=m
CONFIG_FONTS=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y

View File

@ -221,7 +221,6 @@ CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_LIBCRC32C=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y

View File

@ -710,8 +710,6 @@ CONFIG_CRYPTO_DEV_OMAP_DES=m
CONFIG_CRC_CCITT=y
CONFIG_CRC_T10DIF=y
CONFIG_CRC_ITU_T=y
CONFIG_CRC7=y
CONFIG_LIBCRC32C=y
CONFIG_DMA_CMA=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y

View File

@ -235,7 +235,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRC_CCITT=y
CONFIG_LIBCRC32C=m
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y

View File

@ -75,7 +75,6 @@ CONFIG_EXT3_FS=y
# CONFIG_INOTIFY_USER is not set
CONFIG_NLS=y
CONFIG_CRC_ITU_T=y
CONFIG_CRC7=y
CONFIG_PRINTK_TIME=y
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y

View File

@ -193,7 +193,6 @@ CONFIG_PKCS7_MESSAGE_PARSER=y
CONFIG_SYSTEM_TRUSTED_KEYRING=y
CONFIG_CRC_CCITT=y
CONFIG_CRC_ITU_T=m
CONFIG_LIBCRC32C=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y
CONFIG_MAGIC_SYSRQ=y

View File

@ -69,12 +69,6 @@ static void __exit crc_t10dif_arm_exit(void)
}
module_exit(crc_t10dif_arm_exit);
bool crc_t10dif_is_optimized(void)
{
return static_key_enabled(&have_neon);
}
EXPORT_SYMBOL(crc_t10dif_is_optimized);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions");
MODULE_LICENSE("GPL v2");

View File

@ -59,14 +59,14 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
}
EXPORT_SYMBOL(crc32_le_arch);
static u32 crc32c_le_scalar(u32 crc, const u8 *p, size_t len)
static u32 crc32c_scalar(u32 crc, const u8 *p, size_t len)
{
if (static_branch_likely(&have_crc32))
return crc32c_armv8_le(crc, p, len);
return crc32c_le_base(crc, p, len);
return crc32c_base(crc, p, len);
}
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
{
if (len >= PMULL_MIN_LEN + 15 &&
static_branch_likely(&have_pmull) && crypto_simd_usable()) {
@ -74,7 +74,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
/* align p to 16-byte boundary */
if (n) {
crc = crc32c_le_scalar(crc, p, n);
crc = crc32c_scalar(crc, p, n);
p += n;
len -= n;
}
@ -85,9 +85,9 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
p += n;
len -= n;
}
return crc32c_le_scalar(crc, p, len);
return crc32c_scalar(crc, p, len);
}
EXPORT_SYMBOL(crc32c_le_arch);
EXPORT_SYMBOL(crc32c_arch);
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
{

View File

@ -70,12 +70,6 @@ static void __exit crc_t10dif_arm64_exit(void)
}
module_exit(crc_t10dif_arm64_exit);
bool crc_t10dif_is_optimized(void)
{
return static_key_enabled(&have_asimd);
}
EXPORT_SYMBOL(crc_t10dif_is_optimized);
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions");
MODULE_LICENSE("GPL v2");

View File

@ -22,7 +22,7 @@ asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
return crc32_le_base(crc, p, len);
@ -43,10 +43,10 @@ u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
}
EXPORT_SYMBOL(crc32_le_arch);
u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
return crc32c_le_base(crc, p, len);
return crc32c_base(crc, p, len);
if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
kernel_neon_begin();
@ -62,9 +62,9 @@ u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
return crc32c_le_arm64(crc, p, len);
}
EXPORT_SYMBOL(crc32c_le_arch);
EXPORT_SYMBOL(crc32c_arch);
u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
return crc32_be_base(crc, p, len);

View File

@ -75,7 +75,6 @@ CONFIG_CRYPTO_MD5=y
CONFIG_CRC_CCITT=y
CONFIG_CRC16=y
CONFIG_CRC_T10DIF=y
CONFIG_LIBCRC32C=y
CONFIG_FRAME_WARN=0
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y

View File

@ -65,10 +65,10 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
}
EXPORT_SYMBOL(crc32_le_arch);
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
{
if (!static_branch_likely(&have_crc32))
return crc32c_le_base(crc, p, len);
return crc32c_base(crc, p, len);
while (len >= sizeof(u64)) {
u64 value = get_unaligned_le64(p);
@ -100,7 +100,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
return crc;
}
EXPORT_SYMBOL(crc32c_le_arch);
EXPORT_SYMBOL(crc32c_arch);
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
{

View File

@ -69,7 +69,6 @@ CONFIG_USB_HCD_BCMA=y
CONFIG_USB_HCD_SSB=y
CONFIG_LEDS_TRIGGER_TIMER=y
CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_CRC32_SARWATE=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_INFO_REDUCED=y

View File

@ -239,7 +239,6 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
CONFIG_CRC_T10DIF=m
CONFIG_CRC7=m
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DETECT_HUNG_TASK=y

View File

@ -70,4 +70,3 @@ CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFSD=y
CONFIG_NFSD_V3_ACL=y
CONFIG_LIBCRC32C=y

View File

@ -216,7 +216,6 @@ CONFIG_CRYPTO_USER=y
CONFIG_CRYPTO_CRYPTD=y
CONFIG_CRYPTO_USER_API_HASH=y
CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_CRC32_SLICEBY4=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_MAGIC_SYSRQ=y

View File

@ -180,7 +180,6 @@ CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_CRCT10DIF=m
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m

View File

@ -175,7 +175,6 @@ CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_CRCT10DIF=m
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m

View File

@ -175,7 +175,6 @@ CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_CRCT10DIF=m
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m

View File

@ -219,4 +219,3 @@ CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_LZO=m
# CONFIG_CRYPTO_HW is not set
CONFIG_CRC_CCITT=y
CONFIG_CRC7=m

View File

@ -178,7 +178,6 @@ CONFIG_CRYPTO_TEA=y
CONFIG_CRYPTO_TWOFISH=y
CONFIG_CRYPTO_DEFLATE=y
CONFIG_CRC_T10DIF=y
CONFIG_LIBCRC32C=y
CONFIG_FONTS=y
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y

View File

@ -129,7 +129,6 @@ CONFIG_SQUASHFS=y
CONFIG_SQUASHFS_XZ=y
CONFIG_CRYPTO_ARC4=m
CONFIG_CRC_ITU_T=m
CONFIG_CRC32_SARWATE=y
# CONFIG_XZ_DEC_X86 is not set
# CONFIG_XZ_DEC_POWERPC is not set
# CONFIG_XZ_DEC_IA64 is not set

View File

@ -141,7 +141,6 @@ CONFIG_SQUASHFS=y
CONFIG_SQUASHFS_XZ=y
CONFIG_CRYPTO_ARC4=m
CONFIG_CRC_ITU_T=m
CONFIG_CRC32_SARWATE=y
CONFIG_PRINTK_TIME=y
CONFIG_STRIP_ASM_SYMS=y
CONFIG_DEBUG_FS=y

View File

@ -16,15 +16,6 @@
#include <asm/mipsregs.h>
#include <linux/unaligned.h>
enum crc_op_size {
b, h, w, d,
};
enum crc_type {
crc32,
crc32c,
};
#ifndef TOOLCHAIN_SUPPORTS_CRC
#define _ASM_SET_CRC(OP, SZ, TYPE) \
_ASM_MACRO_3R(OP, rt, rs, rt2, \
@ -117,10 +108,10 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
}
EXPORT_SYMBOL(crc32_le_arch);
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
{
if (!static_branch_likely(&have_crc32))
return crc32c_le_base(crc, p, len);
return crc32c_base(crc, p, len);
if (IS_ENABLED(CONFIG_64BIT)) {
for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
@ -158,7 +149,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
}
return crc;
}
EXPORT_SYMBOL(crc32c_le_arch);
EXPORT_SYMBOL(crc32c_arch);
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
{

View File

@ -293,7 +293,6 @@ CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_DEFLATE=m
# CONFIG_CRYPTO_HW is not set
CONFIG_CRC_CCITT=m
CONFIG_LIBCRC32C=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_KERNEL=y
CONFIG_STRIP_ASM_SYMS=y

View File

@ -223,7 +223,6 @@ CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=y
CONFIG_CRC_CCITT=y
CONFIG_CRC_T10DIF=y
CONFIG_LIBCRC32C=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_MD5=y

View File

@ -44,7 +44,6 @@ CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_CRC32_SLICEBY4=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y

View File

@ -47,7 +47,6 @@ CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_CRC32_SLICEBY4=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y

View File

@ -39,4 +39,3 @@ CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_CRC_CCITT=y
CONFIG_CRC32_SLICEBY4=y

View File

@ -70,7 +70,6 @@ CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_CRYPTO=y
CONFIG_CRYPTO_DEV_TALITOS=y
CONFIG_CRC32_SLICEBY4=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y

View File

@ -281,7 +281,6 @@ CONFIG_LSM="yama,loadpin,safesetid,integrity"
# CONFIG_CRYPTO_HW is not set
CONFIG_CRC16=y
CONFIG_CRC_ITU_T=y
CONFIG_LIBCRC32C=y
# CONFIG_XZ_DEC_X86 is not set
# CONFIG_XZ_DEC_IA64 is not set
# CONFIG_XZ_DEC_ARM is not set

View File

@ -54,7 +54,6 @@ CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_CRC32_SLICEBY4=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y

View File

@ -78,12 +78,6 @@ static void __exit crc_t10dif_powerpc_exit(void)
}
module_exit(crc_t10dif_powerpc_exit);
bool crc_t10dif_is_optimized(void)
{
return static_key_enabled(&have_vec_crypto);
}
EXPORT_SYMBOL(crc_t10dif_is_optimized);
MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
MODULE_LICENSE("GPL");

View File

@ -23,18 +23,18 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
}
EXPORT_SYMBOL(crc32_le_arch);
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
{
unsigned int prealign;
unsigned int tail;
if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) ||
!static_branch_likely(&have_vec_crypto) || !crypto_simd_usable())
return crc32c_le_base(crc, p, len);
return crc32c_base(crc, p, len);
if ((unsigned long)p & VMX_ALIGN_MASK) {
prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
crc = crc32c_le_base(crc, p, prealign);
crc = crc32c_base(crc, p, prealign);
len -= prealign;
p += prealign;
}
@ -52,12 +52,12 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
tail = len & VMX_ALIGN_MASK;
if (tail) {
p += len & ~VMX_ALIGN_MASK;
crc = crc32c_le_base(crc, p, tail);
crc = crc32c_base(crc, p, tail);
}
return crc;
}
EXPORT_SYMBOL(crc32c_le_arch);
EXPORT_SYMBOL(crc32c_arch);
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
{

View File

@ -25,6 +25,8 @@ config RISCV
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_BINFMT_FLAT
select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
select ARCH_HAS_CRC64 if 64BIT && RISCV_ISA_ZBC
select ARCH_HAS_CRC_T10DIF if RISCV_ISA_ZBC
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_VM_PGTABLE

View File

@ -16,6 +16,11 @@ lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
crc32-riscv-y := crc32.o crc32_msb.o crc32_lsb.o
obj-$(CONFIG_CRC64_ARCH) += crc64-riscv.o
crc64-riscv-y := crc64.o crc64_msb.o crc64_lsb.o
obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-riscv.o
crc-t10dif-riscv-y := crc-t10dif.o crc16_msb.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o

View File

@ -0,0 +1,122 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* CRC constants generated by:
*
* ./scripts/gen-crc-consts.py riscv_clmul crc16_msb_0x8bb7,crc32_msb_0x04c11db7,crc32_lsb_0xedb88320,crc32_lsb_0x82f63b78,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
*
* Do not edit manually.
*/
struct crc_clmul_consts {
unsigned long fold_across_2_longs_const_hi;
unsigned long fold_across_2_longs_const_lo;
unsigned long barrett_reduction_const_1;
unsigned long barrett_reduction_const_2;
};
/*
* Constants generated for most-significant-bit-first CRC-16 using
* G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
*/
static const struct crc_clmul_consts crc16_msb_0x8bb7_consts __maybe_unused = {
#ifdef CONFIG_64BIT
.fold_across_2_longs_const_hi = 0x0000000000001faa, /* x^192 mod G */
.fold_across_2_longs_const_lo = 0x000000000000a010, /* x^128 mod G */
.barrett_reduction_const_1 = 0xfb2d2bfc0e99d245, /* floor(x^79 / G) */
.barrett_reduction_const_2 = 0x0000000000008bb7, /* G - x^16 */
#else
.fold_across_2_longs_const_hi = 0x00005890, /* x^96 mod G */
.fold_across_2_longs_const_lo = 0x0000f249, /* x^64 mod G */
.barrett_reduction_const_1 = 0xfb2d2bfc, /* floor(x^47 / G) */
.barrett_reduction_const_2 = 0x00008bb7, /* G - x^16 */
#endif
};
/*
* Constants generated for most-significant-bit-first CRC-32 using
* G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
* x^5 + x^4 + x^2 + x^1 + x^0
*/
static const struct crc_clmul_consts crc32_msb_0x04c11db7_consts __maybe_unused = {
#ifdef CONFIG_64BIT
.fold_across_2_longs_const_hi = 0x00000000c5b9cd4c, /* x^192 mod G */
.fold_across_2_longs_const_lo = 0x00000000e8a45605, /* x^128 mod G */
.barrett_reduction_const_1 = 0x826880efa40da72d, /* floor(x^95 / G) */
.barrett_reduction_const_2 = 0x0000000004c11db7, /* G - x^32 */
#else
.fold_across_2_longs_const_hi = 0xf200aa66, /* x^96 mod G */
.fold_across_2_longs_const_lo = 0x490d678d, /* x^64 mod G */
.barrett_reduction_const_1 = 0x826880ef, /* floor(x^63 / G) */
.barrett_reduction_const_2 = 0x04c11db7, /* G - x^32 */
#endif
};
/*
* Constants generated for least-significant-bit-first CRC-32 using
* G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
* x^5 + x^4 + x^2 + x^1 + x^0
*/
static const struct crc_clmul_consts crc32_lsb_0xedb88320_consts __maybe_unused = {
#ifdef CONFIG_64BIT
.fold_across_2_longs_const_hi = 0x65673b4600000000, /* x^191 mod G */
.fold_across_2_longs_const_lo = 0x9ba54c6f00000000, /* x^127 mod G */
.barrett_reduction_const_1 = 0xb4e5b025f7011641, /* floor(x^95 / G) */
.barrett_reduction_const_2 = 0x00000000edb88320, /* (G - x^32) * x^32 */
#else
.fold_across_2_longs_const_hi = 0xccaa009e, /* x^95 mod G */
.fold_across_2_longs_const_lo = 0xb8bc6765, /* x^63 mod G */
.barrett_reduction_const_1 = 0xf7011641, /* floor(x^63 / G) */
.barrett_reduction_const_2 = 0xedb88320, /* (G - x^32) * x^0 */
#endif
};
/*
* Constants generated for least-significant-bit-first CRC-32 using
* G(x) = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 +
* x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0
*/
static const struct crc_clmul_consts crc32_lsb_0x82f63b78_consts __maybe_unused = {
#ifdef CONFIG_64BIT
.fold_across_2_longs_const_hi = 0x3743f7bd00000000, /* x^191 mod G */
.fold_across_2_longs_const_lo = 0x3171d43000000000, /* x^127 mod G */
.barrett_reduction_const_1 = 0x4869ec38dea713f1, /* floor(x^95 / G) */
.barrett_reduction_const_2 = 0x0000000082f63b78, /* (G - x^32) * x^32 */
#else
.fold_across_2_longs_const_hi = 0x493c7d27, /* x^95 mod G */
.fold_across_2_longs_const_lo = 0xdd45aab8, /* x^63 mod G */
.barrett_reduction_const_1 = 0xdea713f1, /* floor(x^63 / G) */
.barrett_reduction_const_2 = 0x82f63b78, /* (G - x^32) * x^0 */
#endif
};
/*
* Constants generated for most-significant-bit-first CRC-64 using
* G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
* x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
* x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
* x^7 + x^4 + x^1 + x^0
*/
#ifdef CONFIG_64BIT
static const struct crc_clmul_consts crc64_msb_0x42f0e1eba9ea3693_consts __maybe_unused = {
.fold_across_2_longs_const_hi = 0x4eb938a7d257740e, /* x^192 mod G */
.fold_across_2_longs_const_lo = 0x05f5c3c7eb52fab6, /* x^128 mod G */
.barrett_reduction_const_1 = 0xabc694e836627c39, /* floor(x^127 / G) */
.barrett_reduction_const_2 = 0x42f0e1eba9ea3693, /* G - x^64 */
};
#endif
/*
* Constants generated for least-significant-bit-first CRC-64 using
* G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
* x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
* x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
* x^4 + x^3 + x^0
*/
#ifdef CONFIG_64BIT
static const struct crc_clmul_consts crc64_lsb_0x9a6c9329ac4bc9b5_consts __maybe_unused = {
.fold_across_2_longs_const_hi = 0xeadc41fd2ba3d420, /* x^191 mod G */
.fold_across_2_longs_const_lo = 0x21e9761e252621ac, /* x^127 mod G */
.barrett_reduction_const_1 = 0x27ecfa329aef9f77, /* floor(x^127 / G) */
.barrett_reduction_const_2 = 0x9a6c9329ac4bc9b5, /* (G - x^64) * x^0 */
};
#endif

View File

@ -0,0 +1,265 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Copyright 2025 Google LLC */
/*
* This file is a "template" that generates a CRC function optimized using the
* RISC-V Zbc (scalar carryless multiplication) extension. The includer of this
* file must define the following parameters to specify the type of CRC:
*
* crc_t: the data type of the CRC, e.g. u32 for a 32-bit CRC
* LSB_CRC: 0 for a msb (most-significant-bit) first CRC, i.e. natural
* mapping between bits and polynomial coefficients
* 1 for a lsb (least-significant-bit) first CRC, i.e. reflected
* mapping between bits and polynomial coefficients
*/
#include <asm/byteorder.h>
#include <linux/minmax.h>
#define CRC_BITS (8 * sizeof(crc_t)) /* a.k.a. 'n' */
static inline unsigned long clmul(unsigned long a, unsigned long b)
{
unsigned long res;
asm(".option push\n"
".option arch,+zbc\n"
"clmul %0, %1, %2\n"
".option pop\n"
: "=r" (res) : "r" (a), "r" (b));
return res;
}
static inline unsigned long clmulh(unsigned long a, unsigned long b)
{
unsigned long res;
asm(".option push\n"
".option arch,+zbc\n"
"clmulh %0, %1, %2\n"
".option pop\n"
: "=r" (res) : "r" (a), "r" (b));
return res;
}
static inline unsigned long clmulr(unsigned long a, unsigned long b)
{
unsigned long res;
asm(".option push\n"
".option arch,+zbc\n"
"clmulr %0, %1, %2\n"
".option pop\n"
: "=r" (res) : "r" (a), "r" (b));
return res;
}
/*
* crc_load_long() loads one "unsigned long" of aligned data bytes, producing a
* polynomial whose bit order matches the CRC's bit order.
*/
#ifdef CONFIG_64BIT
# if LSB_CRC
# define crc_load_long(x) le64_to_cpup(x)
# else
# define crc_load_long(x) be64_to_cpup(x)
# endif
#else
# if LSB_CRC
# define crc_load_long(x) le32_to_cpup(x)
# else
# define crc_load_long(x) be32_to_cpup(x)
# endif
#endif
/* XOR @crc into the end of @msgpoly that represents the high-order terms. */
static inline unsigned long
crc_clmul_prep(crc_t crc, unsigned long msgpoly)
{
#if LSB_CRC
return msgpoly ^ crc;
#else
return msgpoly ^ ((unsigned long)crc << (BITS_PER_LONG - CRC_BITS));
#endif
}
/*
* Multiply the long-sized @msgpoly by x^n (a.k.a. x^CRC_BITS) and reduce it
* modulo the generator polynomial G. This gives the CRC of @msgpoly.
*/
static inline crc_t
crc_clmul_long(unsigned long msgpoly, const struct crc_clmul_consts *consts)
{
unsigned long tmp;
/*
* First step of Barrett reduction with integrated multiplication by
* x^n: calculate floor((msgpoly * x^n) / G). This is the value by
* which G needs to be multiplied to cancel out the x^n and higher terms
* of msgpoly * x^n. Do it using the following formula:
*
* msb-first:
* floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G)) / x^(BITS_PER_LONG-1))
* lsb-first:
* floor((msgpoly * floor(x^(BITS_PER_LONG-1+n) / G) * x) / x^BITS_PER_LONG)
*
* barrett_reduction_const_1 contains floor(x^(BITS_PER_LONG-1+n) / G),
* which fits a long exactly. Using any lower power of x there would
* not carry enough precision through the calculation, while using any
* higher power of x would require extra instructions to handle a wider
* multiplication. In the msb-first case, using this power of x results
* in needing a floored division by x^(BITS_PER_LONG-1), which matches
* what clmulr produces. In the lsb-first case, a factor of x gets
* implicitly introduced by each carryless multiplication (shown as
* '* x' above), and the floored division instead needs to be by
* x^BITS_PER_LONG which matches what clmul produces.
*/
#if LSB_CRC
tmp = clmul(msgpoly, consts->barrett_reduction_const_1);
#else
tmp = clmulr(msgpoly, consts->barrett_reduction_const_1);
#endif
/*
* Second step of Barrett reduction:
*
* crc := (msgpoly * x^n) + (G * floor((msgpoly * x^n) / G))
*
* This reduces (msgpoly * x^n) modulo G by adding the appropriate
* multiple of G to it. The result uses only the x^0..x^(n-1) terms.
* HOWEVER, since the unreduced value (msgpoly * x^n) is zero in those
* terms in the first place, it is more efficient to do the equivalent:
*
* crc := ((G - x^n) * floor((msgpoly * x^n) / G)) mod x^n
*
* In the lsb-first case further modify it to the following which avoids
* a shift, as the crc ends up in the physically low n bits from clmulr:
*
* product := ((G - x^n) * x^(BITS_PER_LONG - n)) * floor((msgpoly * x^n) / G) * x
* crc := floor(product / x^(BITS_PER_LONG + 1 - n)) mod x^n
*
* barrett_reduction_const_2 contains the constant multiplier (G - x^n)
* or (G - x^n) * x^(BITS_PER_LONG - n) from the formulas above. The
* cast of the result to crc_t is essential, as it applies the mod x^n!
*/
#if LSB_CRC
return clmulr(tmp, consts->barrett_reduction_const_2);
#else
return clmul(tmp, consts->barrett_reduction_const_2);
#endif
}
/* Update @crc with the data from @msgpoly. */
static inline crc_t
crc_clmul_update_long(crc_t crc, unsigned long msgpoly,
const struct crc_clmul_consts *consts)
{
return crc_clmul_long(crc_clmul_prep(crc, msgpoly), consts);
}
/* Update @crc with 1 <= @len < sizeof(unsigned long) bytes of data. */
static inline crc_t
crc_clmul_update_partial(crc_t crc, const u8 *p, size_t len,
const struct crc_clmul_consts *consts)
{
unsigned long msgpoly;
size_t i;
#if LSB_CRC
msgpoly = (unsigned long)p[0] << (BITS_PER_LONG - 8);
for (i = 1; i < len; i++)
msgpoly = (msgpoly >> 8) ^ ((unsigned long)p[i] << (BITS_PER_LONG - 8));
#else
msgpoly = p[0];
for (i = 1; i < len; i++)
msgpoly = (msgpoly << 8) ^ p[i];
#endif
if (len >= sizeof(crc_t)) {
#if LSB_CRC
msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
#else
msgpoly ^= (unsigned long)crc << (8*len - CRC_BITS);
#endif
return crc_clmul_long(msgpoly, consts);
}
#if LSB_CRC
msgpoly ^= (unsigned long)crc << (BITS_PER_LONG - 8*len);
return crc_clmul_long(msgpoly, consts) ^ (crc >> (8*len));
#else
msgpoly ^= crc >> (CRC_BITS - 8*len);
return crc_clmul_long(msgpoly, consts) ^ (crc << (8*len));
#endif
}
static inline crc_t
crc_clmul(crc_t crc, const void *p, size_t len,
const struct crc_clmul_consts *consts)
{
size_t align;
/* This implementation assumes that the CRC fits in an unsigned long. */
BUILD_BUG_ON(sizeof(crc_t) > sizeof(unsigned long));
/* If the buffer is not long-aligned, align it. */
align = (unsigned long)p % sizeof(unsigned long);
if (align && len) {
align = min(sizeof(unsigned long) - align, len);
crc = crc_clmul_update_partial(crc, p, align, consts);
p += align;
len -= align;
}
if (len >= 4 * sizeof(unsigned long)) {
unsigned long m0, m1;
m0 = crc_clmul_prep(crc, crc_load_long(p));
m1 = crc_load_long(p + sizeof(unsigned long));
p += 2 * sizeof(unsigned long);
len -= 2 * sizeof(unsigned long);
/*
* Main loop. Each iteration starts with a message polynomial
* (x^BITS_PER_LONG)*m0 + m1, then logically extends it by two
* more longs of data to form x^(3*BITS_PER_LONG)*m0 +
* x^(2*BITS_PER_LONG)*m1 + x^BITS_PER_LONG*m2 + m3, then
* "folds" that back into a congruent (modulo G) value that uses
* just m0 and m1 again. This is done by multiplying m0 by the
* precomputed constant (x^(3*BITS_PER_LONG) mod G) and m1 by
* the precomputed constant (x^(2*BITS_PER_LONG) mod G), then
* adding the results to m2 and m3 as appropriate. Each such
* multiplication produces a result twice the length of a long,
* which in RISC-V is two instructions clmul and clmulh.
*
* This could be changed to fold across more than 2 longs at a
* time if there is a CPU that can take advantage of it.
*/
do {
unsigned long p0, p1, p2, p3;
p0 = clmulh(m0, consts->fold_across_2_longs_const_hi);
p1 = clmul(m0, consts->fold_across_2_longs_const_hi);
p2 = clmulh(m1, consts->fold_across_2_longs_const_lo);
p3 = clmul(m1, consts->fold_across_2_longs_const_lo);
m0 = (LSB_CRC ? p1 ^ p3 : p0 ^ p2) ^ crc_load_long(p);
m1 = (LSB_CRC ? p0 ^ p2 : p1 ^ p3) ^
crc_load_long(p + sizeof(unsigned long));
p += 2 * sizeof(unsigned long);
len -= 2 * sizeof(unsigned long);
} while (len >= 2 * sizeof(unsigned long));
crc = crc_clmul_long(m0, consts);
crc = crc_clmul_update_long(crc, m1, consts);
}
while (len >= sizeof(unsigned long)) {
crc = crc_clmul_update_long(crc, crc_load_long(p), consts);
p += sizeof(unsigned long);
len -= sizeof(unsigned long);
}
if (len)
crc = crc_clmul_update_partial(crc, p, len, consts);
return crc;
}

View File

@ -0,0 +1,23 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Copyright 2025 Google LLC */
#ifndef _RISCV_CRC_CLMUL_H
#define _RISCV_CRC_CLMUL_H
#include <linux/types.h>
#include "crc-clmul-consts.h"
u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts);
u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts);
u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts);
#ifdef CONFIG_64BIT
u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts);
u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts);
#endif
#endif /* _RISCV_CRC_CLMUL_H */

View File

@ -0,0 +1,24 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* RISC-V optimized CRC-T10DIF function
*
* Copyright 2025 Google LLC
*/
#include <asm/hwcap.h>
#include <asm/alternative-macros.h>
#include <linux/crc-t10dif.h>
#include <linux/module.h>
#include "crc-clmul.h"
u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
{
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
return crc16_msb_clmul(crc, p, len, &crc16_msb_0x8bb7_consts);
return crc_t10dif_generic(crc, p, len);
}
EXPORT_SYMBOL(crc_t10dif_arch);
MODULE_DESCRIPTION("RISC-V optimized CRC-T10DIF function");
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,18 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* RISC-V optimized most-significant-bit-first CRC16
*
* Copyright 2025 Google LLC
*/
#include "crc-clmul.h"
typedef u16 crc_t;
#define LSB_CRC 0
#include "crc-clmul-template.h"
u16 crc16_msb_clmul(u16 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts)
{
return crc_clmul(crc, p, len, consts);
}

View File

@ -1,311 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Accelerated CRC32 implementation with Zbc extension.
*
* Copyright (C) 2024 Intel Corporation
*/
#include <asm/hwcap.h>
#include <asm/alternative-macros.h>
#include <asm/byteorder.h>
#include <linux/types.h>
#include <linux/minmax.h>
#include <linux/crc32poly.h>
#include <linux/crc32.h>
#include <linux/byteorder/generic.h>
#include <linux/module.h>
/*
* Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
* better understanding of how this math works.
*
* let "+" denotes polynomial add (XOR)
* let "-" denotes polynomial sub (XOR)
* let "*" denotes polynomial multiplication
* let "/" denotes polynomial floor division
* let "S" denotes source data, XLEN bit wide
* let "P" denotes CRC32 polynomial
* let "T" denotes 2^(XLEN+32)
* let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
*
* crc32(S, P)
* => S * (2^32) - S * (2^32) / P * P
* => lowest 32 bits of: S * (2^32) / P * P
* => lowest 32 bits of: S * (2^32) * (T / P) / T * P
* => lowest 32 bits of: S * (2^32) * quotient / T * P
* => lowest 32 bits of: S * quotient / 2^XLEN * P
* => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
* => clmul_low_part(clmul_high_part(S, QT) + S, P)
*
* In terms of below implementations, the BE case is more intuitive, since the
* higher order bit sits at more significant position.
*/
#if __riscv_xlen == 64
/* Slide by XLEN bits per iteration */
# define STEP_ORDER 3
/* Each below polynomial quotient has an implicit bit for 2^XLEN */
/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
# define CRC32_POLY_QT_LE 0x5a72d812fb808b20
/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8
/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
* the same as the bit-reversed version of CRC32_POLY_QT_LE
*/
# define CRC32_POLY_QT_BE 0x04d101df481b4e5a
static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
{
return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
}
static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
{
u32 crc;
/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
asm volatile (".option push\n"
".option arch,+zbc\n"
"clmul %0, %1, %2\n"
"slli %0, %0, 1\n"
"xor %0, %0, %1\n"
"clmulr %0, %0, %3\n"
"srli %0, %0, 32\n"
".option pop\n"
: "=&r" (crc)
: "r" (s),
"r" (poly_qt),
"r" ((u64)poly << 32)
:);
return crc;
}
static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
{
return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
}
#elif __riscv_xlen == 32
# define STEP_ORDER 2
/* Each quotient should match the upper half of its analog in RV64 */
# define CRC32_POLY_QT_LE 0xfb808b20
# define CRC32C_POLY_QT_LE 0x6f5389f8
# define CRC32_POLY_QT_BE 0x04d101df
static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
{
return crc ^ (__force u32)__cpu_to_le32(*ptr);
}
static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
{
u32 crc;
/* We don't have a "clmulrh" insn, so use clmul + slli instead. */
asm volatile (".option push\n"
".option arch,+zbc\n"
"clmul %0, %1, %2\n"
"slli %0, %0, 1\n"
"xor %0, %0, %1\n"
"clmulr %0, %0, %3\n"
".option pop\n"
: "=&r" (crc)
: "r" (s),
"r" (poly_qt),
"r" (poly)
:);
return crc;
}
static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
{
return crc ^ (__force u32)__cpu_to_be32(*ptr);
}
#else
# error "Unexpected __riscv_xlen"
#endif
static inline u32 crc32_be_zbc(unsigned long s)
{
u32 crc;
asm volatile (".option push\n"
".option arch,+zbc\n"
"clmulh %0, %1, %2\n"
"xor %0, %0, %1\n"
"clmul %0, %0, %3\n"
".option pop\n"
: "=&r" (crc)
: "r" (s),
"r" (CRC32_POLY_QT_BE),
"r" (CRC32_POLY_BE)
:);
return crc;
}
#define STEP (1 << STEP_ORDER)
#define OFFSET_MASK (STEP - 1)
typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
size_t len, u32 poly,
unsigned long poly_qt)
{
size_t bits = len * 8;
unsigned long s = 0;
u32 crc_low = 0;
for (int i = 0; i < len; i++)
s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
s ^= (unsigned long)crc << (__riscv_xlen - bits);
if (__riscv_xlen == 32 || len < sizeof(u32))
crc_low = crc >> bits;
crc = crc32_le_zbc(s, poly, poly_qt);
crc ^= crc_low;
return crc;
}
static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
size_t len, u32 poly,
unsigned long poly_qt,
fallback crc_fb)
{
size_t offset, head_len, tail_len;
unsigned long const *p_ul;
unsigned long s;
asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBC, 1)
: : : : legacy);
/* Handle the unaligned head. */
offset = (unsigned long)p & OFFSET_MASK;
if (offset && len) {
head_len = min(STEP - offset, len);
crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
p += head_len;
len -= head_len;
}
tail_len = len & OFFSET_MASK;
len = len >> STEP_ORDER;
p_ul = (unsigned long const *)p;
for (int i = 0; i < len; i++) {
s = crc32_le_prep(crc, p_ul);
crc = crc32_le_zbc(s, poly, poly_qt);
p_ul++;
}
/* Handle the tail bytes. */
p = (unsigned char const *)p_ul;
if (tail_len)
crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
return crc;
legacy:
return crc_fb(crc, p, len);
}
u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
crc32_le_base);
}
EXPORT_SYMBOL(crc32_le_arch);
u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
CRC32C_POLY_QT_LE, crc32c_le_base);
}
EXPORT_SYMBOL(crc32c_le_arch);
static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
size_t len)
{
size_t bits = len * 8;
unsigned long s = 0;
u32 crc_low = 0;
s = 0;
for (int i = 0; i < len; i++)
s = *p++ | (s << 8);
if (__riscv_xlen == 32 || len < sizeof(u32)) {
s ^= crc >> (32 - bits);
crc_low = crc << bits;
} else {
s ^= (unsigned long)crc << (bits - 32);
}
crc = crc32_be_zbc(s);
crc ^= crc_low;
return crc;
}
u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
{
size_t offset, head_len, tail_len;
unsigned long const *p_ul;
unsigned long s;
asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
RISCV_ISA_EXT_ZBC, 1)
: : : : legacy);
/* Handle the unaligned head. */
offset = (unsigned long)p & OFFSET_MASK;
if (offset && len) {
head_len = min(STEP - offset, len);
crc = crc32_be_unaligned(crc, p, head_len);
p += head_len;
len -= head_len;
}
tail_len = len & OFFSET_MASK;
len = len >> STEP_ORDER;
p_ul = (unsigned long const *)p;
for (int i = 0; i < len; i++) {
s = crc32_be_prep(crc, p_ul);
crc = crc32_be_zbc(s);
p_ul++;
}
/* Handle the tail bytes. */
p = (unsigned char const *)p_ul;
if (tail_len)
crc = crc32_be_unaligned(crc, p, tail_len);
return crc;
legacy:
return crc32_be_base(crc, p, len);
}
EXPORT_SYMBOL(crc32_be_arch);
u32 crc32_optimizations(void)
{
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
return CRC32_LE_OPTIMIZATION |
CRC32_BE_OPTIMIZATION |
CRC32C_OPTIMIZATION;
return 0;
}
EXPORT_SYMBOL(crc32_optimizations);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");

53
arch/riscv/lib/crc32.c Normal file
View File

@ -0,0 +1,53 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* RISC-V optimized CRC32 functions
*
* Copyright 2025 Google LLC
*/
#include <asm/hwcap.h>
#include <asm/alternative-macros.h>
#include <linux/crc32.h>
#include <linux/module.h>
#include "crc-clmul.h"
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
return crc32_lsb_clmul(crc, p, len,
&crc32_lsb_0xedb88320_consts);
return crc32_le_base(crc, p, len);
}
EXPORT_SYMBOL(crc32_le_arch);
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
{
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
return crc32_msb_clmul(crc, p, len,
&crc32_msb_0x04c11db7_consts);
return crc32_be_base(crc, p, len);
}
EXPORT_SYMBOL(crc32_be_arch);
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
{
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
return crc32_lsb_clmul(crc, p, len,
&crc32_lsb_0x82f63b78_consts);
return crc32c_base(crc, p, len);
}
EXPORT_SYMBOL(crc32c_arch);
u32 crc32_optimizations(void)
{
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
return CRC32_LE_OPTIMIZATION |
CRC32_BE_OPTIMIZATION |
CRC32C_OPTIMIZATION;
return 0;
}
EXPORT_SYMBOL(crc32_optimizations);
MODULE_DESCRIPTION("RISC-V optimized CRC32 functions");
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,18 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* RISC-V optimized least-significant-bit-first CRC32
*
* Copyright 2025 Google LLC
*/
#include "crc-clmul.h"
typedef u32 crc_t;
#define LSB_CRC 1
#include "crc-clmul-template.h"
u32 crc32_lsb_clmul(u32 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts)
{
return crc_clmul(crc, p, len, consts);
}

View File

@ -0,0 +1,18 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* RISC-V optimized most-significant-bit-first CRC32
*
* Copyright 2025 Google LLC
*/
#include "crc-clmul.h"
typedef u32 crc_t;
#define LSB_CRC 0
#include "crc-clmul-template.h"
u32 crc32_msb_clmul(u32 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts)
{
return crc_clmul(crc, p, len, consts);
}

34
arch/riscv/lib/crc64.c Normal file
View File

@ -0,0 +1,34 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* RISC-V optimized CRC64 functions
*
* Copyright 2025 Google LLC
*/
#include <asm/hwcap.h>
#include <asm/alternative-macros.h>
#include <linux/crc64.h>
#include <linux/module.h>
#include "crc-clmul.h"
u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
{
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
return crc64_msb_clmul(crc, p, len,
&crc64_msb_0x42f0e1eba9ea3693_consts);
return crc64_be_generic(crc, p, len);
}
EXPORT_SYMBOL(crc64_be_arch);
u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
{
if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC))
return crc64_lsb_clmul(crc, p, len,
&crc64_lsb_0x9a6c9329ac4bc9b5_consts);
return crc64_nvme_generic(crc, p, len);
}
EXPORT_SYMBOL(crc64_nvme_arch);
MODULE_DESCRIPTION("RISC-V optimized CRC64 functions");
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,18 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* RISC-V optimized least-significant-bit-first CRC64
*
* Copyright 2025 Google LLC
*/
#include "crc-clmul.h"
typedef u64 crc_t;
#define LSB_CRC 1
#include "crc-clmul-template.h"
u64 crc64_lsb_clmul(u64 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts)
{
return crc_clmul(crc, p, len, consts);
}

View File

@ -0,0 +1,18 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* RISC-V optimized most-significant-bit-first CRC64
*
* Copyright 2025 Google LLC
*/
#include "crc-clmul.h"
typedef u64 crc_t;
#define LSB_CRC 0
#include "crc-clmul-template.h"
u64 crc64_msb_clmul(u64 crc, const void *p, size_t len,
const struct crc_clmul_consts *consts)
{
return crc_clmul(crc, p, len, consts);
}

View File

@ -815,9 +815,6 @@ CONFIG_SYSTEM_BLACKLIST_KEYRING=y
CONFIG_CORDIC=m
CONFIG_CRYPTO_LIB_CURVE25519=m
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
CONFIG_RANDOM32_SELFTEST=y
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_DMA_CMA=y

View File

@ -803,9 +803,6 @@ CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
CONFIG_CRYPTO_LIB_CURVE25519=m
CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m
CONFIG_CRC4=m
CONFIG_CRC7=m
CONFIG_CRC8=m
CONFIG_XZ_DEC_MICROLZMA=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0

View File

@ -62,7 +62,7 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs);
DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base)
DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
static int __init crc32_s390_init(void)
{

View File

@ -104,5 +104,3 @@ CONFIG_CRYPTO_LZO=y
CONFIG_CRC_CCITT=y
CONFIG_CRC16=y
CONFIG_CRC_ITU_T=y
CONFIG_CRC7=y
CONFIG_LIBCRC32C=y

View File

@ -195,4 +195,3 @@ CONFIG_CRYPTO_LZO=y
# CONFIG_CRYPTO_HW is not set
CONFIG_CRC_CCITT=y
CONFIG_CRC16=y
CONFIG_LIBCRC32C=y

View File

@ -266,4 +266,3 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC16=m
CONFIG_LIBCRC32C=m

View File

@ -94,4 +94,3 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TWOFISH=m
# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
CONFIG_LIBCRC32C=m

View File

@ -230,7 +230,6 @@ CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRC16=m
CONFIG_LIBCRC32C=m
CONFIG_VCC=m
CONFIG_PATA_CMD64X=y
CONFIG_IP_PNP=y

View File

@ -27,17 +27,17 @@ EXPORT_SYMBOL(crc32_le_arch);
void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len);
u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len)
u32 crc32c_arch(u32 crc, const u8 *data, size_t len)
{
size_t n = -(uintptr_t)data & 7;
if (!static_branch_likely(&have_crc32c_opcode))
return crc32c_le_base(crc, data, len);
return crc32c_base(crc, data, len);
if (n) {
/* Data isn't 8-byte aligned. Align it. */
n = min(n, len);
crc = crc32c_le_base(crc, data, n);
crc = crc32c_base(crc, data, n);
data += n;
len -= n;
}
@ -48,10 +48,10 @@ u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len)
len -= n;
}
if (len)
crc = crc32c_le_base(crc, data, len);
crc = crc32c_base(crc, data, len);
return crc;
}
EXPORT_SYMBOL(crc32c_le_arch);
EXPORT_SYMBOL(crc32c_arch);
u32 crc32_be_arch(u32 crc, const u8 *data, size_t len)
{

View File

@ -77,7 +77,8 @@ config X86
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_CPU_PASID if IOMMU_SVA
select ARCH_HAS_CRC32
select ARCH_HAS_CRC_T10DIF if X86_64
select ARCH_HAS_CRC64 if X86_64
select ARCH_HAS_CRC_T10DIF
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE

View File

@ -1536,26 +1536,6 @@ DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
AES_GCM_KEY_AVX10_SIZE, 800);
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
/*
* This is a list of CPU models that are known to suffer from downclocking when
* zmm registers (512-bit vectors) are used. On these CPUs, the AES mode
* implementations with zmm registers won't be used by default. Implementations
* with ymm registers (256-bit vectors) will be used by default instead.
*/
static const struct x86_cpu_id zmm_exclusion_list[] = {
X86_MATCH_VFM(INTEL_SKYLAKE_X, 0),
X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
X86_MATCH_VFM(INTEL_ICELAKE, 0),
X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0),
X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0),
X86_MATCH_VFM(INTEL_TIGERLAKE, 0),
/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
/* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
{},
};
static int __init register_avx_algs(void)
{
int err;
@ -1600,7 +1580,7 @@ static int __init register_avx_algs(void)
if (err)
return err;
if (x86_match_cpu(zmm_exclusion_list)) {
if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
int i;
aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;

View File

@ -480,6 +480,7 @@
#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */
/*
* BUG word(s)

View File

@ -512,6 +512,25 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
}
/*
* This is a list of Intel CPUs that are known to suffer from downclocking when
* ZMM registers (512-bit vectors) are used. On these CPUs, when the kernel
* executes SIMD-optimized code such as cryptography functions or CRCs, it
* should prefer 256-bit (YMM) code to 512-bit (ZMM) code.
*/
static const struct x86_cpu_id zmm_exclusion_list[] = {
X86_MATCH_VFM(INTEL_SKYLAKE_X, 0),
X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
X86_MATCH_VFM(INTEL_ICELAKE, 0),
X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0),
X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0),
X86_MATCH_VFM(INTEL_TIGERLAKE, 0),
/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
{},
};
static void init_intel(struct cpuinfo_x86 *c)
{
early_init_intel(c);
@ -590,6 +609,9 @@ static void init_intel(struct cpuinfo_x86 *c)
}
#endif
if (x86_match_cpu(zmm_exclusion_list))
set_cpu_cap(c, X86_FEATURE_PREFER_YMM);
/* Work around errata */
srat_detect_node(c);

View File

@ -42,8 +42,11 @@ obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o
crc32-x86-y := crc32-glue.o crc32-pclmul.o
crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o
obj-$(CONFIG_CRC64_ARCH) += crc64-x86.o
crc64-x86-y := crc64-glue.o crc64-pclmul.o
obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-x86.o
crc-t10dif-x86-y := crc-t10dif-glue.o crct10dif-pcl-asm_64.o
crc-t10dif-x86-y := crc-t10dif-glue.o crc16-msb-pclmul.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
obj-y += iomem.o

View File

@ -0,0 +1,195 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* CRC constants generated by:
*
* ./scripts/gen-crc-consts.py x86_pclmul crc16_msb_0x8bb7,crc32_lsb_0xedb88320,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
*
* Do not edit manually.
*/
/*
* CRC folding constants generated for most-significant-bit-first CRC-16 using
* G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
*/
static const struct {
u8 bswap_mask[16];
u64 fold_across_2048_bits_consts[2];
u64 fold_across_1024_bits_consts[2];
u64 fold_across_512_bits_consts[2];
u64 fold_across_256_bits_consts[2];
u64 fold_across_128_bits_consts[2];
u8 shuf_table[48];
u64 barrett_reduction_consts[2];
} crc16_msb_0x8bb7_consts ____cacheline_aligned __maybe_unused = {
.bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
.fold_across_2048_bits_consts = {
0xdccf000000000000, /* LO64_TERMS: (x^2000 mod G) * x^48 */
0x4b0b000000000000, /* HI64_TERMS: (x^2064 mod G) * x^48 */
},
.fold_across_1024_bits_consts = {
0x9d9d000000000000, /* LO64_TERMS: (x^976 mod G) * x^48 */
0x7cf5000000000000, /* HI64_TERMS: (x^1040 mod G) * x^48 */
},
.fold_across_512_bits_consts = {
0x044c000000000000, /* LO64_TERMS: (x^464 mod G) * x^48 */
0xe658000000000000, /* HI64_TERMS: (x^528 mod G) * x^48 */
},
.fold_across_256_bits_consts = {
0x6ee3000000000000, /* LO64_TERMS: (x^208 mod G) * x^48 */
0xe7b5000000000000, /* HI64_TERMS: (x^272 mod G) * x^48 */
},
.fold_across_128_bits_consts = {
0x2d56000000000000, /* LO64_TERMS: (x^80 mod G) * x^48 */
0x06df000000000000, /* HI64_TERMS: (x^144 mod G) * x^48 */
},
.shuf_table = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
},
.barrett_reduction_consts = {
0x8bb7000000000000, /* LO64_TERMS: (G - x^16) * x^48 */
0xf65a57f81d33a48a, /* HI64_TERMS: (floor(x^79 / G) * x) - x^64 */
},
};
/*
* CRC folding constants generated for least-significant-bit-first CRC-32 using
* G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
* x^5 + x^4 + x^2 + x^1 + x^0
*/
static const struct {
u64 fold_across_2048_bits_consts[2];
u64 fold_across_1024_bits_consts[2];
u64 fold_across_512_bits_consts[2];
u64 fold_across_256_bits_consts[2];
u64 fold_across_128_bits_consts[2];
u8 shuf_table[48];
u64 barrett_reduction_consts[2];
} crc32_lsb_0xedb88320_consts ____cacheline_aligned __maybe_unused = {
.fold_across_2048_bits_consts = {
0x00000000ce3371cb, /* HI64_TERMS: (x^2079 mod G) * x^32 */
0x00000000e95c1271, /* LO64_TERMS: (x^2015 mod G) * x^32 */
},
.fold_across_1024_bits_consts = {
0x0000000033fff533, /* HI64_TERMS: (x^1055 mod G) * x^32 */
0x00000000910eeec1, /* LO64_TERMS: (x^991 mod G) * x^32 */
},
.fold_across_512_bits_consts = {
0x000000008f352d95, /* HI64_TERMS: (x^543 mod G) * x^32 */
0x000000001d9513d7, /* LO64_TERMS: (x^479 mod G) * x^32 */
},
.fold_across_256_bits_consts = {
0x00000000f1da05aa, /* HI64_TERMS: (x^287 mod G) * x^32 */
0x0000000081256527, /* LO64_TERMS: (x^223 mod G) * x^32 */
},
.fold_across_128_bits_consts = {
0x00000000ae689191, /* HI64_TERMS: (x^159 mod G) * x^32 */
0x00000000ccaa009e, /* LO64_TERMS: (x^95 mod G) * x^32 */
},
.shuf_table = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
},
.barrett_reduction_consts = {
0xb4e5b025f7011641, /* HI64_TERMS: floor(x^95 / G) */
0x00000001db710640, /* LO64_TERMS: (G - x^32) * x^31 */
},
};
/*
* CRC folding constants generated for most-significant-bit-first CRC-64 using
* G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
* x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
* x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
* x^7 + x^4 + x^1 + x^0
*/
static const struct {
u8 bswap_mask[16];
u64 fold_across_2048_bits_consts[2];
u64 fold_across_1024_bits_consts[2];
u64 fold_across_512_bits_consts[2];
u64 fold_across_256_bits_consts[2];
u64 fold_across_128_bits_consts[2];
u8 shuf_table[48];
u64 barrett_reduction_consts[2];
} crc64_msb_0x42f0e1eba9ea3693_consts ____cacheline_aligned __maybe_unused = {
.bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
.fold_across_2048_bits_consts = {
0x7f52691a60ddc70d, /* LO64_TERMS: (x^2048 mod G) * x^0 */
0x7036b0389f6a0c82, /* HI64_TERMS: (x^2112 mod G) * x^0 */
},
.fold_across_1024_bits_consts = {
0x05cf79dea9ac37d6, /* LO64_TERMS: (x^1024 mod G) * x^0 */
0x001067e571d7d5c2, /* HI64_TERMS: (x^1088 mod G) * x^0 */
},
.fold_across_512_bits_consts = {
0x5f6843ca540df020, /* LO64_TERMS: (x^512 mod G) * x^0 */
0xddf4b6981205b83f, /* HI64_TERMS: (x^576 mod G) * x^0 */
},
.fold_across_256_bits_consts = {
0x571bee0a227ef92b, /* LO64_TERMS: (x^256 mod G) * x^0 */
0x44bef2a201b5200c, /* HI64_TERMS: (x^320 mod G) * x^0 */
},
.fold_across_128_bits_consts = {
0x05f5c3c7eb52fab6, /* LO64_TERMS: (x^128 mod G) * x^0 */
0x4eb938a7d257740e, /* HI64_TERMS: (x^192 mod G) * x^0 */
},
.shuf_table = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
},
.barrett_reduction_consts = {
0x42f0e1eba9ea3693, /* LO64_TERMS: (G - x^64) * x^0 */
0x578d29d06cc4f872, /* HI64_TERMS: (floor(x^127 / G) * x) - x^64 */
},
};
/*
* CRC folding constants generated for least-significant-bit-first CRC-64 using
* G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
* x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
* x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
* x^4 + x^3 + x^0
*/
static const struct {
u64 fold_across_2048_bits_consts[2];
u64 fold_across_1024_bits_consts[2];
u64 fold_across_512_bits_consts[2];
u64 fold_across_256_bits_consts[2];
u64 fold_across_128_bits_consts[2];
u8 shuf_table[48];
u64 barrett_reduction_consts[2];
} crc64_lsb_0x9a6c9329ac4bc9b5_consts ____cacheline_aligned __maybe_unused = {
.fold_across_2048_bits_consts = {
0x37ccd3e14069cabc, /* HI64_TERMS: (x^2111 mod G) * x^0 */
0xa043808c0f782663, /* LO64_TERMS: (x^2047 mod G) * x^0 */
},
.fold_across_1024_bits_consts = {
0xa1ca681e733f9c40, /* HI64_TERMS: (x^1087 mod G) * x^0 */
0x5f852fb61e8d92dc, /* LO64_TERMS: (x^1023 mod G) * x^0 */
},
.fold_across_512_bits_consts = {
0x0c32cdb31e18a84a, /* HI64_TERMS: (x^575 mod G) * x^0 */
0x62242240ace5045a, /* LO64_TERMS: (x^511 mod G) * x^0 */
},
.fold_across_256_bits_consts = {
0xb0bc2e589204f500, /* HI64_TERMS: (x^319 mod G) * x^0 */
0xe1e0bb9d45d7a44c, /* LO64_TERMS: (x^255 mod G) * x^0 */
},
.fold_across_128_bits_consts = {
0xeadc41fd2ba3d420, /* HI64_TERMS: (x^191 mod G) * x^0 */
0x21e9761e252621ac, /* LO64_TERMS: (x^127 mod G) * x^0 */
},
.shuf_table = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
},
.barrett_reduction_consts = {
0x27ecfa329aef9f77, /* HI64_TERMS: floor(x^127 / G) */
0x34d926535897936a, /* LO64_TERMS: (G - x^64 - x^0) / x */
},
};

View File

@ -0,0 +1,582 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
//
// Template to generate [V]PCLMULQDQ-based CRC functions for x86
//
// Copyright 2025 Google LLC
//
// Author: Eric Biggers <ebiggers@google.com>
#include <linux/linkage.h>
#include <linux/objtool.h>
// Offsets within the generated constants table
.set OFFSETOF_BSWAP_MASK, -5*16 // msb-first CRCs only
.set OFFSETOF_FOLD_ACROSS_2048_BITS_CONSTS, -4*16 // must precede next
.set OFFSETOF_FOLD_ACROSS_1024_BITS_CONSTS, -3*16 // must precede next
.set OFFSETOF_FOLD_ACROSS_512_BITS_CONSTS, -2*16 // must precede next
.set OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS, -1*16 // must precede next
.set OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS, 0*16 // must be 0
.set OFFSETOF_SHUF_TABLE, 1*16
.set OFFSETOF_BARRETT_REDUCTION_CONSTS, 4*16
// Emit a VEX (or EVEX) coded instruction if allowed, or emulate it using the
// corresponding non-VEX instruction plus any needed moves. The supported
// instruction formats are:
//
// - Two-arg [src, dst], where the non-VEX format is the same.
// - Three-arg [src1, src2, dst] where the non-VEX format is
// [src1, src2_and_dst]. If src2 != dst, then src1 must != dst too.
//
// \insn gives the instruction without a "v" prefix and including any immediate
// argument if needed to make the instruction follow one of the above formats.
// If \unaligned_mem_tmp is given, then the emitted non-VEX code moves \arg1 to
// it first; this is needed when \arg1 is an unaligned mem operand.
.macro _cond_vex insn:req, arg1:req, arg2:req, arg3, unaligned_mem_tmp
.if AVX_LEVEL == 0
// VEX not allowed. Emulate it.
.ifnb \arg3 // Three-arg [src1, src2, dst]
.ifc "\arg2", "\arg3" // src2 == dst?
.ifnb \unaligned_mem_tmp
movdqu \arg1, \unaligned_mem_tmp
\insn \unaligned_mem_tmp, \arg3
.else
\insn \arg1, \arg3
.endif
.else // src2 != dst
.ifc "\arg1", "\arg3"
.error "Can't have src1 == dst when src2 != dst"
.endif
.ifnb \unaligned_mem_tmp
movdqu \arg1, \unaligned_mem_tmp
movdqa \arg2, \arg3
\insn \unaligned_mem_tmp, \arg3
.else
movdqa \arg2, \arg3
\insn \arg1, \arg3
.endif
.endif
.else // Two-arg [src, dst]
.ifnb \unaligned_mem_tmp
movdqu \arg1, \unaligned_mem_tmp
\insn \unaligned_mem_tmp, \arg2
.else
\insn \arg1, \arg2
.endif
.endif
.else
// VEX is allowed. Emit the desired instruction directly.
.ifnb \arg3
v\insn \arg1, \arg2, \arg3
.else
v\insn \arg1, \arg2
.endif
.endif
.endm
// Broadcast an aligned 128-bit mem operand to all 128-bit lanes of a vector
// register of length VL.
.macro _vbroadcast src, dst
.if VL == 16
_cond_vex movdqa, \src, \dst
.elseif VL == 32
vbroadcasti128 \src, \dst
.else
vbroadcasti32x4 \src, \dst
.endif
.endm
// Load \vl bytes from the unaligned mem operand \src into \dst, and if the CRC
// is msb-first use \bswap_mask to reflect the bytes within each 128-bit lane.
.macro _load_data vl, src, bswap_mask, dst
.if \vl < 64
_cond_vex movdqu, "\src", \dst
.else
vmovdqu8 \src, \dst
.endif
.if !LSB_CRC
_cond_vex pshufb, \bswap_mask, \dst, \dst
.endif
.endm
.macro _prepare_v0 vl, v0, v1, bswap_mask
.if LSB_CRC
.if \vl < 64
_cond_vex pxor, (BUF), \v0, \v0, unaligned_mem_tmp=\v1
.else
vpxorq (BUF), \v0, \v0
.endif
.else
_load_data \vl, (BUF), \bswap_mask, \v1
.if \vl < 64
_cond_vex pxor, \v1, \v0, \v0
.else
vpxorq \v1, \v0, \v0
.endif
.endif
.endm
// The x^0..x^63 terms, i.e. poly128 mod x^64, i.e. the physically low qword for
// msb-first order or the physically high qword for lsb-first order
#define LO64_TERMS 0
// The x^64..x^127 terms, i.e. floor(poly128 / x^64), i.e. the physically high
// qword for msb-first order or the physically low qword for lsb-first order
#define HI64_TERMS 1
// Multiply the given \src1_terms of each 128-bit lane of \src1 by the given
// \src2_terms of each 128-bit lane of \src2, and write the result(s) to \dst.
.macro _pclmulqdq src1, src1_terms, src2, src2_terms, dst
_cond_vex "pclmulqdq $((\src1_terms ^ LSB_CRC) << 4) ^ (\src2_terms ^ LSB_CRC),", \
\src1, \src2, \dst
.endm
// Fold \acc into \data and store the result back into \acc. \data can be an
// unaligned mem operand if using VEX is allowed and the CRC is lsb-first so no
// byte-reflection is needed; otherwise it must be a vector register. \consts
// is a vector register containing the needed fold constants, and \tmp is a
// temporary vector register. All arguments must be the same length.
.macro _fold_vec acc, data, consts, tmp
_pclmulqdq \consts, HI64_TERMS, \acc, HI64_TERMS, \tmp
_pclmulqdq \consts, LO64_TERMS, \acc, LO64_TERMS, \acc
.if AVX_LEVEL <= 2
_cond_vex pxor, \data, \tmp, \tmp
_cond_vex pxor, \tmp, \acc, \acc
.else
vpternlogq $0x96, \data, \tmp, \acc
.endif
.endm
// Fold \acc into \data and store the result back into \acc. \data is an
// unaligned mem operand, \consts is a vector register containing the needed
// fold constants, \bswap_mask is a vector register containing the
// byte-reflection table if the CRC is msb-first, and \tmp1 and \tmp2 are
// temporary vector registers. All arguments must have length \vl.
.macro _fold_vec_mem vl, acc, data, consts, bswap_mask, tmp1, tmp2
.if AVX_LEVEL == 0 || !LSB_CRC
_load_data \vl, \data, \bswap_mask, \tmp1
_fold_vec \acc, \tmp1, \consts, \tmp2
.else
_fold_vec \acc, \data, \consts, \tmp1
.endif
.endm
// Load the constants for folding across 2**i vectors of length VL at a time
// into all 128-bit lanes of the vector register CONSTS.
.macro _load_vec_folding_consts i
_vbroadcast OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS+(4-LOG2_VL-\i)*16(CONSTS_PTR), \
CONSTS
.endm
// Given vector registers \v0 and \v1 of length \vl, fold \v0 into \v1 and store
// the result back into \v0. If the remaining length mod \vl is nonzero, also
// fold \vl data bytes from BUF. For both operations the fold distance is \vl.
// \consts must be a register of length \vl containing the fold constants.
.macro _fold_vec_final vl, v0, v1, consts, bswap_mask, tmp1, tmp2
_fold_vec \v0, \v1, \consts, \tmp1
test $\vl, LEN8
jz .Lfold_vec_final_done\@
_fold_vec_mem \vl, \v0, (BUF), \consts, \bswap_mask, \tmp1, \tmp2
add $\vl, BUF
.Lfold_vec_final_done\@:
.endm
// This macro generates the body of a CRC function with the following prototype:
//
// crc_t crc_func(crc_t crc, const u8 *buf, size_t len, const void *consts);
//
// |crc| is the initial CRC, and crc_t is a data type wide enough to hold it.
// |buf| is the data to checksum. |len| is the data length in bytes, which must
// be at least 16. |consts| is a pointer to the fold_across_128_bits_consts
// field of the constants struct that was generated for the chosen CRC variant.
//
// Moving onto the macro parameters, \n is the number of bits in the CRC, e.g.
// 32 for a CRC-32. Currently the supported values are 8, 16, 32, and 64. If
// the file is compiled in i386 mode, then the maximum supported value is 32.
//
// \lsb_crc is 1 if the CRC processes the least significant bit of each byte
// first, i.e. maps bit0 to x^7, bit1 to x^6, ..., bit7 to x^0. \lsb_crc is 0
// if the CRC processes the most significant bit of each byte first, i.e. maps
// bit0 to x^0, bit1 to x^1, bit7 to x^7.
//
// \vl is the maximum length of vector register to use in bytes: 16, 32, or 64.
//
// \avx_level is the level of AVX support to use: 0 for SSE only, 2 for AVX2, or
// 512 for AVX512.
//
// If \vl == 16 && \avx_level == 0, the generated code requires:
// PCLMULQDQ && SSE4.1. (Note: all known CPUs with PCLMULQDQ also have SSE4.1.)
//
// If \vl == 32 && \avx_level == 2, the generated code requires:
// VPCLMULQDQ && AVX2.
//
// If \vl == 64 && \avx_level == 512, the generated code requires:
// VPCLMULQDQ && AVX512BW && AVX512VL.
//
// Other \vl and \avx_level combinations are either not supported or not useful.
.macro _crc_pclmul n, lsb_crc, vl, avx_level
.set LSB_CRC, \lsb_crc
.set VL, \vl
.set AVX_LEVEL, \avx_level
// Define aliases for the xmm, ymm, or zmm registers according to VL.
.irp i, 0,1,2,3,4,5,6,7
.if VL == 16
.set V\i, %xmm\i
.set LOG2_VL, 4
.elseif VL == 32
.set V\i, %ymm\i
.set LOG2_VL, 5
.elseif VL == 64
.set V\i, %zmm\i
.set LOG2_VL, 6
.else
.error "Unsupported vector length"
.endif
.endr
// Define aliases for the function parameters.
// Note: when crc_t is shorter than u32, zero-extension to 32 bits is
// guaranteed by the ABI. Zero-extension to 64 bits is *not* guaranteed
// when crc_t is shorter than u64.
#ifdef __x86_64__
.if \n <= 32
.set CRC, %edi
.else
.set CRC, %rdi
.endif
.set BUF, %rsi
.set LEN, %rdx
.set LEN32, %edx
.set LEN8, %dl
.set CONSTS_PTR, %rcx
#else
// 32-bit support, assuming -mregparm=3 and not including support for
// CRC-64 (which would use both eax and edx to pass the crc parameter).
.set CRC, %eax
.set BUF, %edx
.set LEN, %ecx
.set LEN32, %ecx
.set LEN8, %cl
.set CONSTS_PTR, %ebx // Passed on stack
#endif
// Define aliases for some local variables. V0-V5 are used without
// aliases (for accumulators, data, temporary values, etc). Staying
// within the first 8 vector registers keeps the code 32-bit SSE
// compatible and reduces the size of 64-bit SSE code slightly.
.set BSWAP_MASK, V6
.set BSWAP_MASK_YMM, %ymm6
.set BSWAP_MASK_XMM, %xmm6
.set CONSTS, V7
.set CONSTS_YMM, %ymm7
.set CONSTS_XMM, %xmm7
// Use ANNOTATE_NOENDBR to suppress an objtool warning, since the
// functions generated by this macro are called only by static_call.
ANNOTATE_NOENDBR
#ifdef __i386__
push CONSTS_PTR
mov 8(%esp), CONSTS_PTR
#endif
// Create a 128-bit vector that contains the initial CRC in the end
// representing the high-order polynomial coefficients, and the rest 0.
// If the CRC is msb-first, also load the byte-reflection table.
.if \n <= 32
_cond_vex movd, CRC, %xmm0
.else
_cond_vex movq, CRC, %xmm0
.endif
.if !LSB_CRC
_cond_vex pslldq, $(128-\n)/8, %xmm0, %xmm0
_vbroadcast OFFSETOF_BSWAP_MASK(CONSTS_PTR), BSWAP_MASK
.endif
// Load the first vector of data and XOR the initial CRC into the
// appropriate end of the first 128-bit lane of data. If LEN < VL, then
// use a short vector and jump ahead to the final reduction. (LEN >= 16
// is guaranteed here but not necessarily LEN >= VL.)
.if VL >= 32
cmp $VL, LEN
jae .Lat_least_1vec\@
.if VL == 64
cmp $32, LEN32
jb .Lless_than_32bytes\@
_prepare_v0 32, %ymm0, %ymm1, BSWAP_MASK_YMM
add $32, BUF
jmp .Lreduce_256bits_to_128bits\@
.Lless_than_32bytes\@:
.endif
_prepare_v0 16, %xmm0, %xmm1, BSWAP_MASK_XMM
add $16, BUF
vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
jmp .Lcheck_for_partial_block\@
.Lat_least_1vec\@:
.endif
_prepare_v0 VL, V0, V1, BSWAP_MASK
// Handle VL <= LEN < 4*VL.
cmp $4*VL-1, LEN
ja .Lat_least_4vecs\@
add $VL, BUF
// If VL <= LEN < 2*VL, then jump ahead to the reduction from 1 vector.
// If VL==16 then load fold_across_128_bits_consts first, as the final
// reduction depends on it and it won't be loaded anywhere else.
cmp $2*VL-1, LEN32
.if VL == 16
_cond_vex movdqa, OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
.endif
jbe .Lreduce_1vec_to_128bits\@
// Otherwise 2*VL <= LEN < 4*VL. Load one more vector and jump ahead to
// the reduction from 2 vectors.
_load_data VL, (BUF), BSWAP_MASK, V1
add $VL, BUF
jmp .Lreduce_2vecs_to_1\@
.Lat_least_4vecs\@:
// Load 3 more vectors of data.
_load_data VL, 1*VL(BUF), BSWAP_MASK, V1
_load_data VL, 2*VL(BUF), BSWAP_MASK, V2
_load_data VL, 3*VL(BUF), BSWAP_MASK, V3
sub $-4*VL, BUF // Shorter than 'add 4*VL' when VL=32
add $-4*VL, LEN // Shorter than 'sub 4*VL' when VL=32
// Main loop: while LEN >= 4*VL, fold the 4 vectors V0-V3 into the next
// 4 vectors of data and write the result back to V0-V3.
cmp $4*VL-1, LEN // Shorter than 'cmp 4*VL' when VL=32
jbe .Lreduce_4vecs_to_2\@
_load_vec_folding_consts 2
.Lfold_4vecs_loop\@:
_fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
_fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
_fold_vec_mem VL, V2, 2*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
_fold_vec_mem VL, V3, 3*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
sub $-4*VL, BUF
add $-4*VL, LEN
cmp $4*VL-1, LEN
ja .Lfold_4vecs_loop\@
// Fold V0,V1 into V2,V3 and write the result back to V0,V1. Then fold
// two more vectors of data from BUF, if at least that much remains.
.Lreduce_4vecs_to_2\@:
_load_vec_folding_consts 1
_fold_vec V0, V2, CONSTS, V4
_fold_vec V1, V3, CONSTS, V4
test $2*VL, LEN8
jz .Lreduce_2vecs_to_1\@
_fold_vec_mem VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
_fold_vec_mem VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
sub $-2*VL, BUF
// Fold V0 into V1 and write the result back to V0. Then fold one more
// vector of data from BUF, if at least that much remains.
.Lreduce_2vecs_to_1\@:
_load_vec_folding_consts 0
_fold_vec_final VL, V0, V1, CONSTS, BSWAP_MASK, V4, V5
.Lreduce_1vec_to_128bits\@:
.if VL == 64
// Reduce 512-bit %zmm0 to 256-bit %ymm0. Then fold 256 more bits of
// data from BUF, if at least that much remains.
vbroadcasti128 OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS(CONSTS_PTR), CONSTS_YMM
vextracti64x4 $1, %zmm0, %ymm1
_fold_vec_final 32, %ymm0, %ymm1, CONSTS_YMM, BSWAP_MASK_YMM, %ymm4, %ymm5
.Lreduce_256bits_to_128bits\@:
.endif
.if VL >= 32
// Reduce 256-bit %ymm0 to 128-bit %xmm0. Then fold 128 more bits of
// data from BUF, if at least that much remains.
vmovdqa OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
vextracti128 $1, %ymm0, %xmm1
_fold_vec_final 16, %xmm0, %xmm1, CONSTS_XMM, BSWAP_MASK_XMM, %xmm4, %xmm5
.Lcheck_for_partial_block\@:
.endif
and $15, LEN32
jz .Lreduce_128bits_to_crc\@
// 1 <= LEN <= 15 data bytes remain in BUF. The polynomial is now
// A*(x^(8*LEN)) + B, where A is the 128-bit polynomial stored in %xmm0
// and B is the polynomial of the remaining LEN data bytes. To reduce
// this to 128 bits without needing fold constants for each possible
// LEN, rearrange this expression into C1*(x^128) + C2, where
// C1 = floor(A / x^(128 - 8*LEN)) and C2 = A*x^(8*LEN) + B mod x^128.
// Then fold C1 into C2, which is just another fold across 128 bits.
.if !LSB_CRC || AVX_LEVEL == 0
// Load the last 16 data bytes. Note that originally LEN was >= 16.
_load_data 16, "-16(BUF,LEN)", BSWAP_MASK_XMM, %xmm2
.endif // Else will use vpblendvb mem operand later.
.if !LSB_CRC
neg LEN // Needed for indexing shuf_table
.endif
// tmp = A*x^(8*LEN) mod x^128
// lsb: pshufb by [LEN, LEN+1, ..., 15, -1, -1, ..., -1]
// i.e. right-shift by LEN bytes.
// msb: pshufb by [-1, -1, ..., -1, 0, 1, ..., 15-LEN]
// i.e. left-shift by LEN bytes.
_cond_vex movdqu, "OFFSETOF_SHUF_TABLE+16(CONSTS_PTR,LEN)", %xmm3
_cond_vex pshufb, %xmm3, %xmm0, %xmm1
// C1 = floor(A / x^(128 - 8*LEN))
// lsb: pshufb by [-1, -1, ..., -1, 0, 1, ..., LEN-1]
// i.e. left-shift by 16-LEN bytes.
// msb: pshufb by [16-LEN, 16-LEN+1, ..., 15, -1, -1, ..., -1]
// i.e. right-shift by 16-LEN bytes.
_cond_vex pshufb, "OFFSETOF_SHUF_TABLE+32*!LSB_CRC(CONSTS_PTR,LEN)", \
%xmm0, %xmm0, unaligned_mem_tmp=%xmm4
// C2 = tmp + B. This is just a blend of tmp with the last 16 data
// bytes (reflected if msb-first). The blend mask is the shuffle table
// that was used to create tmp. 0 selects tmp, and 1 last16databytes.
.if AVX_LEVEL == 0
movdqa %xmm0, %xmm4
movdqa %xmm3, %xmm0
pblendvb %xmm2, %xmm1 // uses %xmm0 as implicit operand
movdqa %xmm4, %xmm0
.elseif LSB_CRC
vpblendvb %xmm3, -16(BUF,LEN), %xmm1, %xmm1
.else
vpblendvb %xmm3, %xmm2, %xmm1, %xmm1
.endif
// Fold C1 into C2 and store the 128-bit result in %xmm0.
_fold_vec %xmm0, %xmm1, CONSTS_XMM, %xmm4
.Lreduce_128bits_to_crc\@:
// Compute the CRC as %xmm0 * x^n mod G. Here %xmm0 means the 128-bit
// polynomial stored in %xmm0 (using either lsb-first or msb-first bit
// order according to LSB_CRC), and G is the CRC's generator polynomial.
// First, multiply %xmm0 by x^n and reduce the result to 64+n bits:
//
// t0 := (x^(64+n) mod G) * floor(%xmm0 / x^64) +
// x^n * (%xmm0 mod x^64)
//
// Store t0 * x^(64-n) in %xmm0. I.e., actually do:
//
// %xmm0 := ((x^(64+n) mod G) * x^(64-n)) * floor(%xmm0 / x^64) +
// x^64 * (%xmm0 mod x^64)
//
// The extra unreduced factor of x^(64-n) makes floor(t0 / x^n) aligned
// to the HI64_TERMS of %xmm0 so that the next pclmulqdq can easily
// select it. The 64-bit constant (x^(64+n) mod G) * x^(64-n) in the
// msb-first case, or (x^(63+n) mod G) * x^(64-n) in the lsb-first case
// (considering the extra factor of x that gets implicitly introduced by
// each pclmulqdq when using lsb-first order), is identical to the
// constant that was used earlier for folding the LO64_TERMS across 128
// bits. Thus it's already available in LO64_TERMS of CONSTS_XMM.
_pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm0, HI64_TERMS, %xmm1
.if LSB_CRC
_cond_vex psrldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64)
.else
_cond_vex pslldq, $8, %xmm0, %xmm0 // x^64 * (%xmm0 mod x^64)
.endif
_cond_vex pxor, %xmm1, %xmm0, %xmm0
// The HI64_TERMS of %xmm0 now contain floor(t0 / x^n).
// The LO64_TERMS of %xmm0 now contain (t0 mod x^n) * x^(64-n).
// First step of Barrett reduction: Compute floor(t0 / G). This is the
// polynomial by which G needs to be multiplied to cancel out the x^n
// and higher terms of t0, i.e. to reduce t0 mod G. First do:
//
// t1 := floor(x^(63+n) / G) * x * floor(t0 / x^n)
//
// Then the desired value floor(t0 / G) is floor(t1 / x^64). The 63 in
// x^(63+n) is the maximum degree of floor(t0 / x^n) and thus the lowest
// value that makes enough precision be carried through the calculation.
//
// The '* x' makes it so the result is floor(t1 / x^64) rather than
// floor(t1 / x^63), making it qword-aligned in HI64_TERMS so that it
// can be extracted much more easily in the next step. In the lsb-first
// case the '* x' happens implicitly. In the msb-first case it must be
// done explicitly; floor(x^(63+n) / G) * x is a 65-bit constant, so the
// constant passed to pclmulqdq is (floor(x^(63+n) / G) * x) - x^64, and
// the multiplication by the x^64 term is handled using a pxor. The
// pxor causes the low 64 terms of t1 to be wrong, but they are unused.
_cond_vex movdqa, OFFSETOF_BARRETT_REDUCTION_CONSTS(CONSTS_PTR), CONSTS_XMM
_pclmulqdq CONSTS_XMM, HI64_TERMS, %xmm0, HI64_TERMS, %xmm1
.if !LSB_CRC
_cond_vex pxor, %xmm0, %xmm1, %xmm1 // += x^64 * floor(t0 / x^n)
.endif
// The HI64_TERMS of %xmm1 now contain floor(t1 / x^64) = floor(t0 / G).
// Second step of Barrett reduction: Cancel out the x^n and higher terms
// of t0 by subtracting the needed multiple of G. This gives the CRC:
//
// crc := t0 - (G * floor(t0 / G))
//
// But %xmm0 contains t0 * x^(64-n), so it's more convenient to do:
//
// crc := ((t0 * x^(64-n)) - ((G * x^(64-n)) * floor(t0 / G))) / x^(64-n)
//
// Furthermore, since the resulting CRC is n-bit, if mod x^n is
// explicitly applied to it then the x^n term of G makes no difference
// in the result and can be omitted. This helps keep the constant
// multiplier in 64 bits in most cases. This gives the following:
//
// %xmm0 := %xmm0 - (((G - x^n) * x^(64-n)) * floor(t0 / G))
// crc := (%xmm0 / x^(64-n)) mod x^n
//
// In the lsb-first case, each pclmulqdq implicitly introduces
// an extra factor of x, so in that case the constant that needs to be
// passed to pclmulqdq is actually '(G - x^n) * x^(63-n)' when n <= 63.
// For lsb-first CRCs where n=64, the extra factor of x cannot be as
// easily avoided. In that case, instead pass '(G - x^n - x^0) / x' to
// pclmulqdq and handle the x^0 term (i.e. 1) separately. (All CRC
// polynomials have nonzero x^n and x^0 terms.) It works out as: the
// CRC has be XORed with the physically low qword of %xmm1, representing
// floor(t0 / G). The most efficient way to do that is to move it to
// the physically high qword and use a ternlog to combine the two XORs.
.if LSB_CRC && \n == 64
_cond_vex punpcklqdq, %xmm1, %xmm2, %xmm2
_pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
.if AVX_LEVEL <= 2
_cond_vex pxor, %xmm2, %xmm0, %xmm0
_cond_vex pxor, %xmm1, %xmm0, %xmm0
.else
vpternlogq $0x96, %xmm2, %xmm1, %xmm0
.endif
_cond_vex "pextrq $1,", %xmm0, %rax // (%xmm0 / x^0) mod x^64
.else
_pclmulqdq CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
_cond_vex pxor, %xmm1, %xmm0, %xmm0
.if \n == 8
_cond_vex "pextrb $7 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^56) mod x^8
.elseif \n == 16
_cond_vex "pextrw $3 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^48) mod x^16
.elseif \n == 32
_cond_vex "pextrd $1 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^32) mod x^32
.else // \n == 64 && !LSB_CRC
_cond_vex movq, %xmm0, %rax // (%xmm0 / x^0) mod x^64
.endif
.endif
.if VL > 16
vzeroupper // Needed when ymm or zmm registers may have been used.
.endif
#ifdef __i386__
pop CONSTS_PTR
#endif
RET
.endm
#ifdef CONFIG_AS_VPCLMULQDQ
#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \
SYM_FUNC_START(prefix##_pclmul_sse); \
_crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \
SYM_FUNC_END(prefix##_pclmul_sse); \
\
SYM_FUNC_START(prefix##_vpclmul_avx2); \
_crc_pclmul n=bits, lsb_crc=lsb, vl=32, avx_level=2; \
SYM_FUNC_END(prefix##_vpclmul_avx2); \
\
SYM_FUNC_START(prefix##_vpclmul_avx512); \
_crc_pclmul n=bits, lsb_crc=lsb, vl=64, avx_level=512; \
SYM_FUNC_END(prefix##_vpclmul_avx512);
#else
#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb) \
SYM_FUNC_START(prefix##_pclmul_sse); \
_crc_pclmul n=bits, lsb_crc=lsb, vl=16, avx_level=0; \
SYM_FUNC_END(prefix##_pclmul_sse);
#endif // !CONFIG_AS_VPCLMULQDQ

View File

@ -0,0 +1,76 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Macros for accessing the [V]PCLMULQDQ-based CRC functions that are
* instantiated by crc-pclmul-template.S
*
* Copyright 2025 Google LLC
*
* Author: Eric Biggers <ebiggers@google.com>
*/
#ifndef _CRC_PCLMUL_TEMPLATE_H
#define _CRC_PCLMUL_TEMPLATE_H
#include <asm/cpufeatures.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <linux/static_call.h>
#include "crc-pclmul-consts.h"
#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t) \
crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len, \
const void *consts_ptr); \
crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len, \
const void *consts_ptr); \
crc_t prefix##_vpclmul_avx512(crc_t crc, const u8 *p, size_t len, \
const void *consts_ptr); \
DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)
#define INIT_CRC_PCLMUL(prefix) \
do { \
if (IS_ENABLED(CONFIG_AS_VPCLMULQDQ) && \
boot_cpu_has(X86_FEATURE_VPCLMULQDQ) && \
boot_cpu_has(X86_FEATURE_AVX2) && \
cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL)) { \
if (boot_cpu_has(X86_FEATURE_AVX512BW) && \
boot_cpu_has(X86_FEATURE_AVX512VL) && \
!boot_cpu_has(X86_FEATURE_PREFER_YMM) && \
cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL)) { \
static_call_update(prefix##_pclmul, \
prefix##_vpclmul_avx512); \
} else { \
static_call_update(prefix##_pclmul, \
prefix##_vpclmul_avx2); \
} \
} \
} while (0)
/*
* Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16
* bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD.
*
* 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions.
* There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(),
* varying by CPU and factors such as which parts of the "FPU" state userspace
* has touched, which could result in a larger cutoff being better. Indeed, a
* larger cutoff is usually better for a *single* message. However, the
* overhead of the FPU section gets amortized if multiple FPU sections get
* executed before returning to userspace, since the XSAVE and XRSTOR occur only
* once. Considering that and the fact that the [V]PCLMULQDQ code is lighter on
* the dcache than the table-based code is, a 16-byte cutoff seems to work well.
*/
#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \
do { \
if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \
crypto_simd_usable()) { \
const void *consts_ptr; \
\
consts_ptr = (consts).fold_across_128_bits_consts; \
kernel_fpu_begin(); \
crc = static_call(prefix##_pclmul)((crc), (p), (len), \
consts_ptr); \
kernel_fpu_end(); \
return crc; \
} \
} while (0)
#endif /* _CRC_PCLMUL_TEMPLATE_H */

View File

@ -1,37 +1,32 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* CRC-T10DIF using PCLMULQDQ instructions
* CRC-T10DIF using [V]PCLMULQDQ instructions
*
* Copyright 2024 Google LLC
*/
#include <asm/cpufeatures.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <linux/crc-t10dif.h>
#include <linux/module.h>
#include "crc-pclmul-template.h"
static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len);
DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16);
u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
{
if (len >= 16 &&
static_key_enabled(&have_pclmulqdq) && crypto_simd_usable()) {
kernel_fpu_begin();
crc = crc_t10dif_pcl(crc, p, len);
kernel_fpu_end();
return crc;
}
CRC_PCLMUL(crc, p, len, crc16_msb, crc16_msb_0x8bb7_consts,
have_pclmulqdq);
return crc_t10dif_generic(crc, p, len);
}
EXPORT_SYMBOL(crc_t10dif_arch);
static int __init crc_t10dif_x86_init(void)
{
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
static_branch_enable(&have_pclmulqdq);
INIT_CRC_PCLMUL(crc16_msb);
}
return 0;
}
arch_initcall(crc_t10dif_x86_init);
@ -41,11 +36,5 @@ static void __exit crc_t10dif_x86_exit(void)
}
module_exit(crc_t10dif_x86_exit);
bool crc_t10dif_is_optimized(void)
{
return static_key_enabled(&have_pclmulqdq);
}
EXPORT_SYMBOL(crc_t10dif_is_optimized);
MODULE_DESCRIPTION("CRC-T10DIF using PCLMULQDQ instructions");
MODULE_DESCRIPTION("CRC-T10DIF using [V]PCLMULQDQ instructions");
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
// Copyright 2025 Google LLC
#include "crc-pclmul-template.S"
DEFINE_CRC_PCLMUL_FUNCS(crc16_msb, /* bits= */ 16, /* lsb= */ 0)

View File

@ -7,43 +7,20 @@
* Copyright 2024 Google LLC
*/
#include <asm/cpufeatures.h>
#include <asm/simd.h>
#include <crypto/internal/simd.h>
#include <linux/crc32.h>
#include <linux/linkage.h>
#include <linux/module.h>
/* minimum size of buffer for crc32_pclmul_le_16 */
#define CRC32_PCLMUL_MIN_LEN 64
#include "crc-pclmul-template.h"
static DEFINE_STATIC_KEY_FALSE(have_crc32);
static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
if (len >= CRC32_PCLMUL_MIN_LEN + 15 &&
static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
size_t n = -(uintptr_t)p & 15;
/* align p to 16-byte boundary */
if (n) {
crc = crc32_le_base(crc, p, n);
p += n;
len -= n;
}
n = round_down(len, 16);
kernel_fpu_begin();
crc = crc32_pclmul_le_16(crc, p, n);
kernel_fpu_end();
p += n;
len -= n;
}
if (len)
crc = crc32_le_base(crc, p, len);
return crc;
CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
have_pclmulqdq);
return crc32_le_base(crc, p, len);
}
EXPORT_SYMBOL(crc32_le_arch);
@ -61,12 +38,12 @@ EXPORT_SYMBOL(crc32_le_arch);
asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
{
size_t num_longs;
if (!static_branch_likely(&have_crc32))
return crc32c_le_base(crc, p, len);
return crc32c_base(crc, p, len);
if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
@ -78,14 +55,22 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
for (num_longs = len / sizeof(unsigned long);
num_longs != 0; num_longs--, p += sizeof(unsigned long))
asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
for (len %= sizeof(unsigned long); len; len--, p++)
asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
if (sizeof(unsigned long) > 4 && (len & 4)) {
asm("crc32l %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u32 *)p));
p += 4;
}
if (len & 2) {
asm("crc32w %1, %0" : "+r" (crc) : ASM_INPUT_RM (*(u16 *)p));
p += 2;
}
if (len & 1)
asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
return crc;
}
EXPORT_SYMBOL(crc32c_le_arch);
EXPORT_SYMBOL(crc32c_arch);
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
{
@ -97,8 +82,10 @@ static int __init crc32_x86_init(void)
{
if (boot_cpu_has(X86_FEATURE_XMM4_2))
static_branch_enable(&have_crc32);
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
static_branch_enable(&have_pclmulqdq);
INIT_CRC_PCLMUL(crc32_lsb);
}
return 0;
}
arch_initcall(crc32_x86_init);

View File

@ -1,217 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2012 Xyratex Technology Limited
*
* Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
* calculation.
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
* at:
* http://www.intel.com/products/processor/manuals/
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
* Volume 2B: Instruction Set Reference, N-Z
*
* Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
* Alexander Boyko <Alexander_Boyko@xyratex.com>
*/
/* SPDX-License-Identifier: GPL-2.0-or-later */
// Copyright 2025 Google LLC
#include <linux/linkage.h>
#include "crc-pclmul-template.S"
.section .rodata
.align 16
/*
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
* #define CONSTANT_R1 0x154442bd4LL
*
* [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
* #define CONSTANT_R2 0x1c6e41596LL
*/
.Lconstant_R2R1:
.octa 0x00000001c6e415960000000154442bd4
/*
* [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
* #define CONSTANT_R3 0x1751997d0LL
*
* [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
* #define CONSTANT_R4 0x0ccaa009eLL
*/
.Lconstant_R4R3:
.octa 0x00000000ccaa009e00000001751997d0
/*
* [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
* #define CONSTANT_R5 0x163cd6124LL
*/
.Lconstant_R5:
.octa 0x00000000000000000000000163cd6124
.Lconstant_mask32:
.octa 0x000000000000000000000000FFFFFFFF
/*
* #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
*
* Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
* #define CONSTANT_RU 0x1F7011641LL
*/
.Lconstant_RUpoly:
.octa 0x00000001F701164100000001DB710641
#define CONSTANT %xmm0
#ifdef __x86_64__
#define CRC %edi
#define BUF %rsi
#define LEN %rdx
#else
#define CRC %eax
#define BUF %edx
#define LEN %ecx
#endif
.text
/**
* Calculate crc32
* CRC - initial crc32
* BUF - buffer (16 bytes aligned)
* LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63
* return %eax crc32
* u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
*/
SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
movdqa (BUF), %xmm1
movdqa 0x10(BUF), %xmm2
movdqa 0x20(BUF), %xmm3
movdqa 0x30(BUF), %xmm4
movd CRC, CONSTANT
pxor CONSTANT, %xmm1
sub $0x40, LEN
add $0x40, BUF
cmp $0x40, LEN
jb .Lless_64
#ifdef __x86_64__
movdqa .Lconstant_R2R1(%rip), CONSTANT
#else
movdqa .Lconstant_R2R1, CONSTANT
#endif
.Lloop_64:/* 64 bytes Full cache line folding */
prefetchnta 0x40(BUF)
movdqa %xmm1, %xmm5
movdqa %xmm2, %xmm6
movdqa %xmm3, %xmm7
#ifdef __x86_64__
movdqa %xmm4, %xmm8
#endif
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x00, CONSTANT, %xmm2
pclmulqdq $0x00, CONSTANT, %xmm3
#ifdef __x86_64__
pclmulqdq $0x00, CONSTANT, %xmm4
#endif
pclmulqdq $0x11, CONSTANT, %xmm5
pclmulqdq $0x11, CONSTANT, %xmm6
pclmulqdq $0x11, CONSTANT, %xmm7
#ifdef __x86_64__
pclmulqdq $0x11, CONSTANT, %xmm8
#endif
pxor %xmm5, %xmm1
pxor %xmm6, %xmm2
pxor %xmm7, %xmm3
#ifdef __x86_64__
pxor %xmm8, %xmm4
#else
/* xmm8 unsupported for x32 */
movdqa %xmm4, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm4
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm4
#endif
pxor (BUF), %xmm1
pxor 0x10(BUF), %xmm2
pxor 0x20(BUF), %xmm3
pxor 0x30(BUF), %xmm4
sub $0x40, LEN
add $0x40, BUF
cmp $0x40, LEN
jge .Lloop_64
.Lless_64:/* Folding cache line into 128bit */
#ifdef __x86_64__
movdqa .Lconstant_R4R3(%rip), CONSTANT
#else
movdqa .Lconstant_R4R3, CONSTANT
#endif
prefetchnta (BUF)
movdqa %xmm1, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm2, %xmm1
movdqa %xmm1, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm3, %xmm1
movdqa %xmm1, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor %xmm4, %xmm1
cmp $0x10, LEN
jb .Lfold_64
.Lloop_16:/* Folding rest buffer into 128bit */
movdqa %xmm1, %xmm5
pclmulqdq $0x00, CONSTANT, %xmm1
pclmulqdq $0x11, CONSTANT, %xmm5
pxor %xmm5, %xmm1
pxor (BUF), %xmm1
sub $0x10, LEN
add $0x10, BUF
cmp $0x10, LEN
jge .Lloop_16
.Lfold_64:
/* perform the last 64 bit fold, also adds 32 zeroes
* to the input stream */
pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
psrldq $0x08, %xmm1
pxor CONSTANT, %xmm1
/* final 32-bit fold */
movdqa %xmm1, %xmm2
#ifdef __x86_64__
movdqa .Lconstant_R5(%rip), CONSTANT
movdqa .Lconstant_mask32(%rip), %xmm3
#else
movdqa .Lconstant_R5, CONSTANT
movdqa .Lconstant_mask32, %xmm3
#endif
psrldq $0x04, %xmm2
pand %xmm3, %xmm1
pclmulqdq $0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
#ifdef __x86_64__
movdqa .Lconstant_RUpoly(%rip), CONSTANT
#else
movdqa .Lconstant_RUpoly, CONSTANT
#endif
movdqa %xmm1, %xmm2
pand %xmm3, %xmm1
pclmulqdq $0x10, CONSTANT, %xmm1
pand %xmm3, %xmm1
pclmulqdq $0x00, CONSTANT, %xmm1
pxor %xmm2, %xmm1
pextrd $0x01, %xmm1, %eax
RET
SYM_FUNC_END(crc32_pclmul_le_16)
DEFINE_CRC_PCLMUL_FUNCS(crc32_lsb, /* bits= */ 32, /* lsb= */ 1)

50
arch/x86/lib/crc64-glue.c Normal file
View File

@ -0,0 +1,50 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* CRC64 using [V]PCLMULQDQ instructions
*
* Copyright 2025 Google LLC
*/
#include <linux/crc64.h>
#include <linux/module.h>
#include "crc-pclmul-template.h"
static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64);
DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64);
u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
{
CRC_PCLMUL(crc, p, len, crc64_msb, crc64_msb_0x42f0e1eba9ea3693_consts,
have_pclmulqdq);
return crc64_be_generic(crc, p, len);
}
EXPORT_SYMBOL_GPL(crc64_be_arch);
u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
{
CRC_PCLMUL(crc, p, len, crc64_lsb, crc64_lsb_0x9a6c9329ac4bc9b5_consts,
have_pclmulqdq);
return crc64_nvme_generic(crc, p, len);
}
EXPORT_SYMBOL_GPL(crc64_nvme_arch);
static int __init crc64_x86_init(void)
{
if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
static_branch_enable(&have_pclmulqdq);
INIT_CRC_PCLMUL(crc64_msb);
INIT_CRC_PCLMUL(crc64_lsb);
}
return 0;
}
arch_initcall(crc64_x86_init);
static void __exit crc64_x86_exit(void)
{
}
module_exit(crc64_x86_exit);
MODULE_DESCRIPTION("CRC64 using [V]PCLMULQDQ instructions");
MODULE_LICENSE("GPL");

View File

@ -0,0 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
// Copyright 2025 Google LLC
#include "crc-pclmul-template.S"
DEFINE_CRC_PCLMUL_FUNCS(crc64_msb, /* bits= */ 64, /* lsb= */ 0)
DEFINE_CRC_PCLMUL_FUNCS(crc64_lsb, /* bits= */ 64, /* lsb= */ 1)

View File

@ -1,332 +0,0 @@
########################################################################
# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
#
# Copyright (c) 2013, Intel Corporation
#
# Authors:
# Erdinc Ozturk <erdinc.ozturk@intel.com>
# Vinodh Gopal <vinodh.gopal@intel.com>
# James Guilford <james.guilford@intel.com>
# Tim Chen <tim.c.chen@linux.intel.com>
#
# This software is available to you under a choice of one of two
# licenses. You may choose to be licensed under the terms of the GNU
# General Public License (GPL) Version 2, available from the file
# COPYING in the main directory of this source tree, or the
# OpenIB.org BSD license below:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the
# distribution.
#
# * Neither the name of the Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
#
# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Reference paper titled "Fast CRC Computation for Generic
# Polynomials Using PCLMULQDQ Instruction"
# URL: http://www.intel.com/content/dam/www/public/us/en/documents
# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
#
#include <linux/linkage.h>
.text
#define init_crc %edi
#define buf %rsi
#define len %rdx
#define FOLD_CONSTS %xmm10
#define BSWAP_MASK %xmm11
# Fold reg1, reg2 into the next 32 data bytes, storing the result back into
# reg1, reg2.
.macro fold_32_bytes offset, reg1, reg2
movdqu \offset(buf), %xmm9
movdqu \offset+16(buf), %xmm12
pshufb BSWAP_MASK, %xmm9
pshufb BSWAP_MASK, %xmm12
movdqa \reg1, %xmm8
movdqa \reg2, %xmm13
pclmulqdq $0x00, FOLD_CONSTS, \reg1
pclmulqdq $0x11, FOLD_CONSTS, %xmm8
pclmulqdq $0x00, FOLD_CONSTS, \reg2
pclmulqdq $0x11, FOLD_CONSTS, %xmm13
pxor %xmm9 , \reg1
xorps %xmm8 , \reg1
pxor %xmm12, \reg2
xorps %xmm13, \reg2
.endm
# Fold src_reg into dst_reg.
.macro fold_16_bytes src_reg, dst_reg
movdqa \src_reg, %xmm8
pclmulqdq $0x11, FOLD_CONSTS, \src_reg
pclmulqdq $0x00, FOLD_CONSTS, %xmm8
pxor %xmm8, \dst_reg
xorps \src_reg, \dst_reg
.endm
#
# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len);
#
# Assumes len >= 16.
#
SYM_FUNC_START(crc_t10dif_pcl)
movdqa .Lbswap_mask(%rip), BSWAP_MASK
# For sizes less than 256 bytes, we can't fold 128 bytes at a time.
cmp $256, len
jl .Lless_than_256_bytes
# Load the first 128 data bytes. Byte swapping is necessary to make the
# bit order match the polynomial coefficient order.
movdqu 16*0(buf), %xmm0
movdqu 16*1(buf), %xmm1
movdqu 16*2(buf), %xmm2
movdqu 16*3(buf), %xmm3
movdqu 16*4(buf), %xmm4
movdqu 16*5(buf), %xmm5
movdqu 16*6(buf), %xmm6
movdqu 16*7(buf), %xmm7
add $128, buf
pshufb BSWAP_MASK, %xmm0
pshufb BSWAP_MASK, %xmm1
pshufb BSWAP_MASK, %xmm2
pshufb BSWAP_MASK, %xmm3
pshufb BSWAP_MASK, %xmm4
pshufb BSWAP_MASK, %xmm5
pshufb BSWAP_MASK, %xmm6
pshufb BSWAP_MASK, %xmm7
# XOR the first 16 data *bits* with the initial CRC value.
pxor %xmm8, %xmm8
pinsrw $7, init_crc, %xmm8
pxor %xmm8, %xmm0
movdqa .Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS
# Subtract 128 for the 128 data bytes just consumed. Subtract another
# 128 to simplify the termination condition of the following loop.
sub $256, len
# While >= 128 data bytes remain (not counting xmm0-7), fold the 128
# bytes xmm0-7 into them, storing the result back into xmm0-7.
.Lfold_128_bytes_loop:
fold_32_bytes 0, %xmm0, %xmm1
fold_32_bytes 32, %xmm2, %xmm3
fold_32_bytes 64, %xmm4, %xmm5
fold_32_bytes 96, %xmm6, %xmm7
add $128, buf
sub $128, len
jge .Lfold_128_bytes_loop
# Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7.
# Fold across 64 bytes.
movdqa .Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS
fold_16_bytes %xmm0, %xmm4
fold_16_bytes %xmm1, %xmm5
fold_16_bytes %xmm2, %xmm6
fold_16_bytes %xmm3, %xmm7
# Fold across 32 bytes.
movdqa .Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS
fold_16_bytes %xmm4, %xmm6
fold_16_bytes %xmm5, %xmm7
# Fold across 16 bytes.
movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
fold_16_bytes %xmm6, %xmm7
# Add 128 to get the correct number of data bytes remaining in 0...127
# (not counting xmm7), following the previous extra subtraction by 128.
# Then subtract 16 to simplify the termination condition of the
# following loop.
add $128-16, len
# While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes
# xmm7 into them, storing the result back into xmm7.
jl .Lfold_16_bytes_loop_done
.Lfold_16_bytes_loop:
movdqa %xmm7, %xmm8
pclmulqdq $0x11, FOLD_CONSTS, %xmm7
pclmulqdq $0x00, FOLD_CONSTS, %xmm8
pxor %xmm8, %xmm7
movdqu (buf), %xmm0
pshufb BSWAP_MASK, %xmm0
pxor %xmm0 , %xmm7
add $16, buf
sub $16, len
jge .Lfold_16_bytes_loop
.Lfold_16_bytes_loop_done:
# Add 16 to get the correct number of data bytes remaining in 0...15
# (not counting xmm7), following the previous extra subtraction by 16.
add $16, len
je .Lreduce_final_16_bytes
.Lhandle_partial_segment:
# Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16
# bytes are in xmm7 and the rest are the remaining data in 'buf'. To do
# this without needing a fold constant for each possible 'len', redivide
# the bytes into a first chunk of 'len' bytes and a second chunk of 16
# bytes, then fold the first chunk into the second.
movdqa %xmm7, %xmm2
# xmm1 = last 16 original data bytes
movdqu -16(buf, len), %xmm1
pshufb BSWAP_MASK, %xmm1
# xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes.
lea .Lbyteshift_table+16(%rip), %rax
sub len, %rax
movdqu (%rax), %xmm0
pshufb %xmm0, %xmm2
# xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes.
pxor .Lmask1(%rip), %xmm0
pshufb %xmm0, %xmm7
# xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes),
# then '16-len' bytes from xmm2 (high-order bytes).
pblendvb %xmm2, %xmm1 #xmm0 is implicit
# Fold the first chunk into the second chunk, storing the result in xmm7.
movdqa %xmm7, %xmm8
pclmulqdq $0x11, FOLD_CONSTS, %xmm7
pclmulqdq $0x00, FOLD_CONSTS, %xmm8
pxor %xmm8, %xmm7
pxor %xmm1, %xmm7
.Lreduce_final_16_bytes:
# Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC
# Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
movdqa .Lfinal_fold_consts(%rip), FOLD_CONSTS
# Fold the high 64 bits into the low 64 bits, while also multiplying by
# x^64. This produces a 128-bit value congruent to x^64 * M(x) and
# whose low 48 bits are 0.
movdqa %xmm7, %xmm0
pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x))
pslldq $8, %xmm0
pxor %xmm0, %xmm7 # + low bits * x^64
# Fold the high 32 bits into the low 96 bits. This produces a 96-bit
# value congruent to x^64 * M(x) and whose low 48 bits are 0.
movdqa %xmm7, %xmm0
pand .Lmask2(%rip), %xmm0 # zero high 32 bits
psrldq $12, %xmm7 # extract high 32 bits
pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x))
pxor %xmm0, %xmm7 # + low bits
# Load G(x) and floor(x^48 / G(x)).
movdqa .Lbarrett_reduction_consts(%rip), FOLD_CONSTS
# Use Barrett reduction to compute the final CRC value.
movdqa %xmm7, %xmm0
pclmulqdq $0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x))
psrlq $32, %xmm7 # /= x^32
pclmulqdq $0x00, FOLD_CONSTS, %xmm7 # *= G(x)
psrlq $48, %xmm0
pxor %xmm7, %xmm0 # + low 16 nonzero bits
# Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
pextrw $0, %xmm0, %eax
RET
.align 16
.Lless_than_256_bytes:
# Checksumming a buffer of length 16...255 bytes
# Load the first 16 data bytes.
movdqu (buf), %xmm7
pshufb BSWAP_MASK, %xmm7
add $16, buf
# XOR the first 16 data *bits* with the initial CRC value.
pxor %xmm0, %xmm0
pinsrw $7, init_crc, %xmm0
pxor %xmm0, %xmm7
movdqa .Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
cmp $16, len
je .Lreduce_final_16_bytes # len == 16
sub $32, len
jge .Lfold_16_bytes_loop # 32 <= len <= 255
add $16, len
jmp .Lhandle_partial_segment # 17 <= len <= 31
SYM_FUNC_END(crc_t10dif_pcl)
.section .rodata, "a", @progbits
.align 16
# Fold constants precomputed from the polynomial 0x18bb7
# G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
.Lfold_across_128_bytes_consts:
.quad 0x0000000000006123 # x^(8*128) mod G(x)
.quad 0x0000000000002295 # x^(8*128+64) mod G(x)
.Lfold_across_64_bytes_consts:
.quad 0x0000000000001069 # x^(4*128) mod G(x)
.quad 0x000000000000dd31 # x^(4*128+64) mod G(x)
.Lfold_across_32_bytes_consts:
.quad 0x000000000000857d # x^(2*128) mod G(x)
.quad 0x0000000000007acc # x^(2*128+64) mod G(x)
.Lfold_across_16_bytes_consts:
.quad 0x000000000000a010 # x^(1*128) mod G(x)
.quad 0x0000000000001faa # x^(1*128+64) mod G(x)
.Lfinal_fold_consts:
.quad 0x1368000000000000 # x^48 * (x^48 mod G(x))
.quad 0x2d56000000000000 # x^48 * (x^80 mod G(x))
.Lbarrett_reduction_consts:
.quad 0x0000000000018bb7 # G(x)
.quad 0x00000001f65a57f8 # floor(x^48 / G(x))
.section .rodata.cst16.mask1, "aM", @progbits, 16
.align 16
.Lmask1:
.octa 0x80808080808080808080808080808080
.section .rodata.cst16.mask2, "aM", @progbits, 16
.align 16
.Lmask2:
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
.section .rodata.cst16.bswap_mask, "aM", @progbits, 16
.align 16
.Lbswap_mask:
.octa 0x000102030405060708090A0B0C0D0E0F
.section .rodata.cst32.byteshift_table, "aM", @progbits, 32
.align 16
# For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len]
# is the index vector to shift left by 'len' bytes, and is also {0x80, ...,
# 0x80} XOR the index vector to shift right by '16 - len' bytes.
.Lbyteshift_table:
.byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
.byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe , 0x0

View File

@ -63,7 +63,7 @@ config BLK_DEV_BSGLIB
config BLK_DEV_INTEGRITY
bool "Block layer data integrity support"
select CRC_T10DIF
select CRC64_ROCKSOFT
select CRC64
help
Some storage devices allow extra information to be
stored/retrieved to help protect the data. The block layer

View File

@ -210,7 +210,7 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
{
return cpu_to_be64(crc64_rocksoft_update(crc, data, len));
return cpu_to_be64(crc64_nvme(crc, data, len));
}
static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,

View File

@ -1081,26 +1081,6 @@ config CRYPTO_CRC32
Used by RoCEv2 and f2fs.
config CRYPTO_CRCT10DIF
tristate "CRCT10DIF"
select CRYPTO_HASH
select CRC_T10DIF
help
CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF)
CRC algorithm used by the SCSI Block Commands standard.
config CRYPTO_CRC64_ROCKSOFT
tristate "CRC64 based on Rocksoft Model algorithm"
depends on CRC64
select CRYPTO_HASH
help
CRC64 CRC algorithm based on the Rocksoft Model CRC Algorithm
Used by the NVMe implementation of T10 DIF (BLK_DEV_INTEGRITY)
See https://zlib.net/crc_v3.txt
endmenu
menu "Compression"

View File

@ -155,9 +155,6 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o
CFLAGS_crc32c_generic.o += -DARCH=$(ARCH)
CFLAGS_crc32_generic.o += -DARCH=$(ARCH)
obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_generic.o
CFLAGS_crct10dif_generic.o += -DARCH=$(ARCH)
obj-$(CONFIG_CRYPTO_CRC64_ROCKSOFT) += crc64_rocksoft_generic.o
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
obj-$(CONFIG_CRYPTO_LZ4) += lz4.o

View File

@ -85,7 +85,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data,
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc32c_le_base(ctx->crc, data, length);
ctx->crc = crc32c_base(ctx->crc, data, length);
return 0;
}
@ -94,7 +94,7 @@ static int chksum_update_arch(struct shash_desc *desc, const u8 *data,
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = __crc32c_le(ctx->crc, data, length);
ctx->crc = crc32c(ctx->crc, data, length);
return 0;
}
@ -108,14 +108,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
{
put_unaligned_le32(~crc32c_le_base(*crcp, data, len), out);
put_unaligned_le32(~crc32c_base(*crcp, data, len), out);
return 0;
}
static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
put_unaligned_le32(~__crc32c_le(*crcp, data, len), out);
put_unaligned_le32(~crc32c(*crcp, data, len), out);
return 0;
}

View File

@ -1,89 +0,0 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/crc64.h>
#include <linux/module.h>
#include <crypto/internal/hash.h>
#include <linux/unaligned.h>
static int chksum_init(struct shash_desc *desc)
{
u64 *crc = shash_desc_ctx(desc);
*crc = 0;
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
u64 *crc = shash_desc_ctx(desc);
*crc = crc64_rocksoft_generic(*crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
u64 *crc = shash_desc_ctx(desc);
put_unaligned_le64(*crc, out);
return 0;
}
static int __chksum_finup(u64 crc, const u8 *data, unsigned int len, u8 *out)
{
crc = crc64_rocksoft_generic(crc, data, len);
put_unaligned_le64(crc, out);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
u64 *crc = shash_desc_ctx(desc);
return __chksum_finup(*crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
return __chksum_finup(0, data, length, out);
}
static struct shash_alg alg = {
.digestsize = sizeof(u64),
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(u64),
.base = {
.cra_name = CRC64_ROCKSOFT_STRING,
.cra_driver_name = "crc64-rocksoft-generic",
.cra_priority = 200,
.cra_blocksize = 1,
.cra_module = THIS_MODULE,
}
};
static int __init crc64_rocksoft_init(void)
{
return crypto_register_shash(&alg);
}
static void __exit crc64_rocksoft_exit(void)
{
crypto_unregister_shash(&alg);
}
module_init(crc64_rocksoft_init);
module_exit(crc64_rocksoft_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Rocksoft model CRC64 calculation.");
MODULE_ALIAS_CRYPTO("crc64-rocksoft");
MODULE_ALIAS_CRYPTO("crc64-rocksoft-generic");

View File

@ -1,168 +0,0 @@
/*
* Cryptographic API.
*
* T10 Data Integrity Field CRC16 Crypto Transform
*
* Copyright (c) 2007 Oracle Corporation. All rights reserved.
* Written by Martin K. Petersen <martin.petersen@oracle.com>
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/module.h>
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/kernel.h>
struct chksum_desc_ctx {
__u16 crc;
};
/*
* Steps through buffer one byte at a time, calculates reflected
* crc using table.
*/
static int chksum_init(struct shash_desc *desc)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = 0;
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
return 0;
}
static int chksum_update_arch(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc_t10dif_update(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
*(__u16 *)out = ctx->crc;
return 0;
}
static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out)
{
*(__u16 *)out = crc_t10dif_generic(crc, data, len);
return 0;
}
static int __chksum_finup_arch(__u16 crc, const u8 *data, unsigned int len,
u8 *out)
{
*(__u16 *)out = crc_t10dif_update(crc, data, len);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(ctx->crc, data, len, out);
}
static int chksum_finup_arch(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup_arch(ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
return __chksum_finup(0, data, length, out);
}
static int chksum_digest_arch(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
return __chksum_finup_arch(0, data, length, out);
}
static struct shash_alg algs[] = {{
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base.cra_name = "crct10dif",
.base.cra_driver_name = "crct10dif-generic",
.base.cra_priority = 100,
.base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
}, {
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = chksum_init,
.update = chksum_update_arch,
.final = chksum_final,
.finup = chksum_finup_arch,
.digest = chksum_digest_arch,
.descsize = sizeof(struct chksum_desc_ctx),
.base.cra_name = "crct10dif",
.base.cra_driver_name = "crct10dif-" __stringify(ARCH),
.base.cra_priority = 150,
.base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
}};
static int num_algs;
static int __init crct10dif_mod_init(void)
{
/* register the arch flavor only if it differs from the generic one */
num_algs = 1 + crc_t10dif_is_optimized();
return crypto_register_shashes(algs, num_algs);
}
static void __exit crct10dif_mod_fini(void)
{
crypto_unregister_shashes(algs, num_algs);
}
subsys_initcall(crct10dif_mod_init);
module_exit(crct10dif_mod_fini);
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
MODULE_DESCRIPTION("T10 DIF CRC calculation.");
MODULE_LICENSE("GPL");
MODULE_ALIAS_CRYPTO("crct10dif");
MODULE_ALIAS_CRYPTO("crct10dif-generic");

View File

@ -1654,10 +1654,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret = min(ret, tcrypt_test("ghash"));
break;
case 47:
ret = min(ret, tcrypt_test("crct10dif"));
break;
case 48:
ret = min(ret, tcrypt_test("sha3-224"));
break;
@ -2272,10 +2268,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
test_hash_speed("crc32c", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
fallthrough;
case 320:
test_hash_speed("crct10dif", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
fallthrough;
case 321:
test_hash_speed("poly1305", sec, poly1305_speed_template);
if (mode > 300 && mode < 400) break;

View File

@ -4759,20 +4759,6 @@ static const struct alg_test_desc alg_test_descs[] = {
.suite = {
.hash = __VECS(crc32c_tv_template)
}
}, {
.alg = "crc64-rocksoft",
.test = alg_test_hash,
.fips_allowed = 1,
.suite = {
.hash = __VECS(crc64_rocksoft_tv_template)
}
}, {
.alg = "crct10dif",
.test = alg_test_hash,
.fips_allowed = 1,
.suite = {
.hash = __VECS(crct10dif_tv_template)
}
}, {
.alg = "ctr(aes)",
.test = alg_test_skcipher,

View File

@ -6017,309 +6017,6 @@ static const struct hash_testvec rmd160_tv_template[] = {
}
};
static const u8 zeroes[4096] = { [0 ... 4095] = 0 };
static const u8 ones[4096] = { [0 ... 4095] = 0xff };
static const struct hash_testvec crc64_rocksoft_tv_template[] = {
{
.plaintext = zeroes,
.psize = 4096,
.digest = "\x4e\xb6\x22\xeb\x67\xd3\x82\x64",
}, {
.plaintext = ones,
.psize = 4096,
.digest = "\xac\xa3\xec\x02\x73\xba\xdd\xc0",
}
};
static const struct hash_testvec crct10dif_tv_template[] = {
{
.plaintext = "abc",
.psize = 3,
.digest = (u8 *)(u16 []){ 0x443b },
}, {
.plaintext = "1234567890123456789012345678901234567890"
"123456789012345678901234567890123456789",
.psize = 79,
.digest = (u8 *)(u16 []){ 0x4b70 },
}, {
.plaintext = "abcdddddddddddddddddddddddddddddddddddddddd"
"ddddddddddddd",
.psize = 56,
.digest = (u8 *)(u16 []){ 0x9ce3 },
}, {
.plaintext = "1234567890123456789012345678901234567890"
"1234567890123456789012345678901234567890"
"1234567890123456789012345678901234567890"
"1234567890123456789012345678901234567890"
"1234567890123456789012345678901234567890"
"1234567890123456789012345678901234567890"
"1234567890123456789012345678901234567890"
"123456789012345678901234567890123456789",
.psize = 319,
.digest = (u8 *)(u16 []){ 0x44c6 },
}, {
.plaintext = "\x6e\x05\x79\x10\xa7\x1b\xb2\x49"
"\xe0\x54\xeb\x82\x19\x8d\x24\xbb"
"\x2f\xc6\x5d\xf4\x68\xff\x96\x0a"
"\xa1\x38\xcf\x43\xda\x71\x08\x7c"
"\x13\xaa\x1e\xb5\x4c\xe3\x57\xee"
"\x85\x1c\x90\x27\xbe\x32\xc9\x60"
"\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2"
"\x46\xdd\x74\x0b\x7f\x16\xad\x21"
"\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93"
"\x2a\xc1\x35\xcc\x63\xfa\x6e\x05"
"\x9c\x10\xa7\x3e\xd5\x49\xe0\x77"
"\x0e\x82\x19\xb0\x24\xbb\x52\xe9"
"\x5d\xf4\x8b\x22\x96\x2d\xc4\x38"
"\xcf\x66\xfd\x71\x08\x9f\x13\xaa"
"\x41\xd8\x4c\xe3\x7a\x11\x85\x1c"
"\xb3\x27\xbe\x55\xec\x60\xf7\x8e"
"\x02\x99\x30\xc7\x3b\xd2\x69\x00"
"\x74\x0b\xa2\x16\xad\x44\xdb\x4f"
"\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1"
"\x58\xef\x63\xfa\x91\x05\x9c\x33"
"\xca\x3e\xd5\x6c\x03\x77\x0e\xa5"
"\x19\xb0\x47\xde\x52\xe9\x80\x17"
"\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66"
"\xfd\x94\x08\x9f\x36\xcd\x41\xd8"
"\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a"
"\xe1\x55\xec\x83\x1a\x8e\x25\xbc"
"\x30\xc7\x5e\xf5\x69\x00\x97\x0b"
"\xa2\x39\xd0\x44\xdb\x72\x09\x7d"
"\x14\xab\x1f\xb6\x4d\xe4\x58\xef"
"\x86\x1d\x91\x28\xbf\x33\xca\x61"
"\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3"
"\x47\xde\x75\x0c\x80\x17\xae\x22"
"\xb9\x50\xe7\x5b\xf2\x89\x20\x94"
"\x2b\xc2\x36\xcd\x64\xfb\x6f\x06"
"\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78"
"\x0f\x83\x1a\xb1\x25\xbc\x53\xea"
"\x5e\xf5\x8c\x00\x97\x2e\xc5\x39"
"\xd0\x67\xfe\x72\x09\xa0\x14\xab"
"\x42\xd9\x4d\xe4\x7b\x12\x86\x1d"
"\xb4\x28\xbf\x56\xed\x61\xf8\x8f"
"\x03\x9a\x31\xc8\x3c\xd3\x6a\x01"
"\x75\x0c\xa3\x17\xae\x45\xdc\x50"
"\xe7\x7e\x15\x89\x20\xb7\x2b\xc2"
"\x59\xf0\x64\xfb\x92\x06\x9d\x34"
"\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6"
"\x1a\xb1\x48\xdf\x53\xea\x81\x18"
"\x8c\x23\xba\x2e\xc5\x5c\xf3\x67"
"\xfe\x95\x09\xa0\x37\xce\x42\xd9"
"\x70\x07\x7b\x12\xa9\x1d\xb4\x4b"
"\xe2\x56\xed\x84\x1b\x8f\x26\xbd"
"\x31\xc8\x5f\xf6\x6a\x01\x98\x0c"
"\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e"
"\x15\xac\x20\xb7\x4e\xe5\x59\xf0"
"\x87\x1e\x92\x29\xc0\x34\xcb\x62"
"\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4"
"\x48\xdf\x76\x0d\x81\x18\xaf\x23"
"\xba\x51\xe8\x5c\xf3\x8a\x21\x95"
"\x2c\xc3\x37\xce\x65\xfc\x70\x07"
"\x9e\x12\xa9\x40\xd7\x4b\xe2\x79"
"\x10\x84\x1b\xb2\x26\xbd\x54\xeb"
"\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a"
"\xd1\x68\xff\x73\x0a\xa1\x15\xac"
"\x43\xda\x4e\xe5\x7c\x13\x87\x1e"
"\xb5\x29\xc0\x57\xee\x62\xf9\x90"
"\x04\x9b\x32\xc9\x3d\xd4\x6b\x02"
"\x76\x0d\xa4\x18\xaf\x46\xdd\x51"
"\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3"
"\x5a\xf1\x65\xfc\x93\x07\x9e\x35"
"\xcc\x40\xd7\x6e\x05\x79\x10\xa7"
"\x1b\xb2\x49\xe0\x54\xeb\x82\x19"
"\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68"
"\xff\x96\x0a\xa1\x38\xcf\x43\xda"
"\x71\x08\x7c\x13\xaa\x1e\xb5\x4c"
"\xe3\x57\xee\x85\x1c\x90\x27\xbe"
"\x32\xc9\x60\xf7\x6b\x02\x99\x0d"
"\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f"
"\x16\xad\x21\xb8\x4f\xe6\x5a\xf1"
"\x88\x1f\x93\x2a\xc1\x35\xcc\x63"
"\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5"
"\x49\xe0\x77\x0e\x82\x19\xb0\x24"
"\xbb\x52\xe9\x5d\xf4\x8b\x22\x96"
"\x2d\xc4\x38\xcf\x66\xfd\x71\x08"
"\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a"
"\x11\x85\x1c\xb3\x27\xbe\x55\xec"
"\x60\xf7\x8e\x02\x99\x30\xc7\x3b"
"\xd2\x69\x00\x74\x0b\xa2\x16\xad"
"\x44\xdb\x4f\xe6\x7d\x14\x88\x1f"
"\xb6\x2a\xc1\x58\xef\x63\xfa\x91"
"\x05\x9c\x33\xca\x3e\xd5\x6c\x03"
"\x77\x0e\xa5\x19\xb0\x47\xde\x52"
"\xe9\x80\x17\x8b\x22\xb9\x2d\xc4"
"\x5b\xf2\x66\xfd\x94\x08\x9f\x36"
"\xcd\x41\xd8\x6f\x06\x7a\x11\xa8"
"\x1c\xb3\x4a\xe1\x55\xec\x83\x1a"
"\x8e\x25\xbc\x30\xc7\x5e\xf5\x69"
"\x00\x97\x0b\xa2\x39\xd0\x44\xdb"
"\x72\x09\x7d\x14\xab\x1f\xb6\x4d"
"\xe4\x58\xef\x86\x1d\x91\x28\xbf"
"\x33\xca\x61\xf8\x6c\x03\x9a\x0e"
"\xa5\x3c\xd3\x47\xde\x75\x0c\x80"
"\x17\xae\x22\xb9\x50\xe7\x5b\xf2"
"\x89\x20\x94\x2b\xc2\x36\xcd\x64"
"\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6"
"\x4a\xe1\x78\x0f\x83\x1a\xb1\x25"
"\xbc\x53\xea\x5e\xf5\x8c\x00\x97"
"\x2e\xc5\x39\xd0\x67\xfe\x72\x09"
"\xa0\x14\xab\x42\xd9\x4d\xe4\x7b"
"\x12\x86\x1d\xb4\x28\xbf\x56\xed"
"\x61\xf8\x8f\x03\x9a\x31\xc8\x3c"
"\xd3\x6a\x01\x75\x0c\xa3\x17\xae"
"\x45\xdc\x50\xe7\x7e\x15\x89\x20"
"\xb7\x2b\xc2\x59\xf0\x64\xfb\x92"
"\x06\x9d\x34\xcb\x3f\xd6\x6d\x04"
"\x78\x0f\xa6\x1a\xb1\x48\xdf\x53"
"\xea\x81\x18\x8c\x23\xba\x2e\xc5"
"\x5c\xf3\x67\xfe\x95\x09\xa0\x37"
"\xce\x42\xd9\x70\x07\x7b\x12\xa9"
"\x1d\xb4\x4b\xe2\x56\xed\x84\x1b"
"\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a"
"\x01\x98\x0c\xa3\x3a\xd1\x45\xdc"
"\x73\x0a\x7e\x15\xac\x20\xb7\x4e"
"\xe5\x59\xf0\x87\x1e\x92\x29\xc0"
"\x34\xcb\x62\xf9\x6d\x04\x9b\x0f"
"\xa6\x3d\xd4\x48\xdf\x76\x0d\x81"
"\x18\xaf\x23\xba\x51\xe8\x5c\xf3"
"\x8a\x21\x95\x2c\xc3\x37\xce\x65"
"\xfc\x70\x07\x9e\x12\xa9\x40\xd7"
"\x4b\xe2\x79\x10\x84\x1b\xb2\x26"
"\xbd\x54\xeb\x5f\xf6\x8d\x01\x98"
"\x2f\xc6\x3a\xd1\x68\xff\x73\x0a"
"\xa1\x15\xac\x43\xda\x4e\xe5\x7c"
"\x13\x87\x1e\xb5\x29\xc0\x57\xee"
"\x62\xf9\x90\x04\x9b\x32\xc9\x3d"
"\xd4\x6b\x02\x76\x0d\xa4\x18\xaf"
"\x46\xdd\x51\xe8\x7f\x16\x8a\x21"
"\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93"
"\x07\x9e\x35\xcc\x40\xd7\x6e\x05"
"\x79\x10\xa7\x1b\xb2\x49\xe0\x54"
"\xeb\x82\x19\x8d\x24\xbb\x2f\xc6"
"\x5d\xf4\x68\xff\x96\x0a\xa1\x38"
"\xcf\x43\xda\x71\x08\x7c\x13\xaa"
"\x1e\xb5\x4c\xe3\x57\xee\x85\x1c"
"\x90\x27\xbe\x32\xc9\x60\xf7\x6b"
"\x02\x99\x0d\xa4\x3b\xd2\x46\xdd"
"\x74\x0b\x7f\x16\xad\x21\xb8\x4f"
"\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1"
"\x35\xcc\x63\xfa\x6e\x05\x9c\x10"
"\xa7\x3e\xd5\x49\xe0\x77\x0e\x82"
"\x19\xb0\x24\xbb\x52\xe9\x5d\xf4"
"\x8b\x22\x96\x2d\xc4\x38\xcf\x66"
"\xfd\x71\x08\x9f\x13\xaa\x41\xd8"
"\x4c\xe3\x7a\x11\x85\x1c\xb3\x27"
"\xbe\x55\xec\x60\xf7\x8e\x02\x99"
"\x30\xc7\x3b\xd2\x69\x00\x74\x0b"
"\xa2\x16\xad\x44\xdb\x4f\xe6\x7d"
"\x14\x88\x1f\xb6\x2a\xc1\x58\xef"
"\x63\xfa\x91\x05\x9c\x33\xca\x3e"
"\xd5\x6c\x03\x77\x0e\xa5\x19\xb0"
"\x47\xde\x52\xe9\x80\x17\x8b\x22"
"\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94"
"\x08\x9f\x36\xcd\x41\xd8\x6f\x06"
"\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55"
"\xec\x83\x1a\x8e\x25\xbc\x30\xc7"
"\x5e\xf5\x69\x00\x97\x0b\xa2\x39"
"\xd0\x44\xdb\x72\x09\x7d\x14\xab"
"\x1f\xb6\x4d\xe4\x58\xef\x86\x1d"
"\x91\x28\xbf\x33\xca\x61\xf8\x6c"
"\x03\x9a\x0e\xa5\x3c\xd3\x47\xde"
"\x75\x0c\x80\x17\xae\x22\xb9\x50"
"\xe7\x5b\xf2\x89\x20\x94\x2b\xc2"
"\x36\xcd\x64\xfb\x6f\x06\x9d\x11"
"\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83"
"\x1a\xb1\x25\xbc\x53\xea\x5e\xf5"
"\x8c\x00\x97\x2e\xc5\x39\xd0\x67"
"\xfe\x72\x09\xa0\x14\xab\x42\xd9"
"\x4d\xe4\x7b\x12\x86\x1d\xb4\x28"
"\xbf\x56\xed\x61\xf8\x8f\x03\x9a"
"\x31\xc8\x3c\xd3\x6a\x01\x75\x0c"
"\xa3\x17\xae\x45\xdc\x50\xe7\x7e"
"\x15\x89\x20\xb7\x2b\xc2\x59\xf0"
"\x64\xfb\x92\x06\x9d\x34\xcb\x3f"
"\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1"
"\x48\xdf\x53\xea\x81\x18\x8c\x23"
"\xba\x2e\xc5\x5c\xf3\x67\xfe\x95"
"\x09\xa0\x37\xce\x42\xd9\x70\x07"
"\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56"
"\xed\x84\x1b\x8f\x26\xbd\x31\xc8"
"\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a"
"\xd1\x45\xdc\x73\x0a\x7e\x15\xac"
"\x20\xb7\x4e\xe5\x59\xf0\x87\x1e"
"\x92\x29\xc0\x34\xcb\x62\xf9\x6d"
"\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf"
"\x76\x0d\x81\x18\xaf\x23\xba\x51"
"\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3"
"\x37\xce\x65\xfc\x70\x07\x9e\x12"
"\xa9\x40\xd7\x4b\xe2\x79\x10\x84"
"\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6"
"\x8d\x01\x98\x2f\xc6\x3a\xd1\x68"
"\xff\x73\x0a\xa1\x15\xac\x43\xda"
"\x4e\xe5\x7c\x13\x87\x1e\xb5\x29"
"\xc0\x57\xee\x62\xf9\x90\x04\x9b"
"\x32\xc9\x3d\xd4\x6b\x02\x76\x0d"
"\xa4\x18\xaf\x46\xdd\x51\xe8\x7f"
"\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1"
"\x65\xfc\x93\x07\x9e\x35\xcc\x40"
"\xd7\x6e\x05\x79\x10\xa7\x1b\xb2"
"\x49\xe0\x54\xeb\x82\x19\x8d\x24"
"\xbb\x2f\xc6\x5d\xf4\x68\xff\x96"
"\x0a\xa1\x38\xcf\x43\xda\x71\x08"
"\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57"
"\xee\x85\x1c\x90\x27\xbe\x32\xc9"
"\x60\xf7\x6b\x02\x99\x0d\xa4\x3b"
"\xd2\x46\xdd\x74\x0b\x7f\x16\xad"
"\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f"
"\x93\x2a\xc1\x35\xcc\x63\xfa\x6e"
"\x05\x9c\x10\xa7\x3e\xd5\x49\xe0"
"\x77\x0e\x82\x19\xb0\x24\xbb\x52"
"\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4"
"\x38\xcf\x66\xfd\x71\x08\x9f\x13"
"\xaa\x41\xd8\x4c\xe3\x7a\x11\x85"
"\x1c\xb3\x27\xbe\x55\xec\x60\xf7"
"\x8e\x02\x99\x30\xc7\x3b\xd2\x69"
"\x00\x74\x0b\xa2\x16\xad\x44\xdb"
"\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a"
"\xc1\x58\xef\x63\xfa\x91\x05\x9c"
"\x33\xca\x3e\xd5\x6c\x03\x77\x0e"
"\xa5\x19\xb0\x47\xde\x52\xe9\x80"
"\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2"
"\x66\xfd\x94\x08\x9f\x36\xcd\x41"
"\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3"
"\x4a\xe1\x55\xec\x83\x1a\x8e\x25"
"\xbc\x30\xc7\x5e\xf5\x69\x00\x97"
"\x0b\xa2\x39\xd0\x44\xdb\x72\x09"
"\x7d\x14\xab\x1f\xb6\x4d\xe4\x58"
"\xef\x86\x1d\x91\x28\xbf\x33\xca"
"\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c"
"\xd3\x47\xde\x75\x0c\x80\x17\xae"
"\x22\xb9\x50\xe7\x5b\xf2\x89\x20"
"\x94\x2b\xc2\x36\xcd\x64\xfb\x6f"
"\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1"
"\x78\x0f\x83\x1a\xb1\x25\xbc\x53"
"\xea\x5e\xf5\x8c\x00\x97\x2e\xc5"
"\x39\xd0\x67\xfe\x72\x09\xa0\x14"
"\xab\x42\xd9\x4d\xe4\x7b\x12\x86"
"\x1d\xb4\x28\xbf\x56\xed\x61\xf8"
"\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a"
"\x01\x75\x0c\xa3\x17\xae\x45\xdc"
"\x50\xe7\x7e\x15\x89\x20\xb7\x2b"
"\xc2\x59\xf0\x64\xfb\x92\x06\x9d"
"\x34\xcb\x3f\xd6\x6d\x04\x78\x0f"
"\xa6\x1a\xb1\x48\xdf\x53\xea\x81"
"\x18\x8c\x23\xba\x2e\xc5\x5c\xf3"
"\x67\xfe\x95\x09\xa0\x37\xce\x42"
"\xd9\x70\x07\x7b\x12\xa9\x1d\xb4"
"\x4b\xe2\x56\xed\x84\x1b\x8f\x26"
"\xbd\x31\xc8\x5f\xf6\x6a\x01\x98",
.psize = 2048,
.digest = (u8 *)(u16 []){ 0x23ca },
}
};
/*
* Streebog test vectors from RFC 6986 and GOST R 34.11-2012
*/

View File

@ -162,7 +162,7 @@ static int burst_update(struct shash_desc *desc, const u8 *d8,
if (mctx->poly == CRC32_POLY_LE)
ctx->partial = crc32_le(ctx->partial, d8, length);
else
ctx->partial = __crc32c_le(ctx->partial, d8, length);
ctx->partial = crc32c(ctx->partial, d8, length);
goto pm_out;
}

View File

@ -676,7 +676,7 @@ static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum)
static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset,
int len)
{
return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
return (__force __wsum)crc32c_combine((__force __u32)csum,
(__force __u32)csum2, len);
}

View File

@ -714,7 +714,7 @@ static void r5l_submit_current_io(struct r5l_log *log)
block = page_address(io->meta_page);
block->meta_size = cpu_to_le32(io->meta_offset);
crc = crc32c_le(log->uuid_checksum, block, PAGE_SIZE);
crc = crc32c(log->uuid_checksum, block, PAGE_SIZE);
block->checksum = cpu_to_le32(crc);
log->current_io = NULL;
@ -1020,7 +1020,7 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
continue;
addr = kmap_local_page(sh->dev[i].page);
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
sh->dev[i].log_checksum = crc32c(log->uuid_checksum,
addr, PAGE_SIZE);
kunmap_local(addr);
}
@ -1741,7 +1741,7 @@ static int r5l_recovery_read_meta_block(struct r5l_log *log,
le64_to_cpu(mb->position) != ctx->pos)
return -EINVAL;
crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE);
crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE);
if (stored_crc != crc)
return -EINVAL;
@ -1780,8 +1780,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
return -ENOMEM;
r5l_recovery_create_empty_meta_block(log, page, pos, seq);
mb = page_address(page);
mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
mb, PAGE_SIZE));
mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum, mb, PAGE_SIZE));
if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE |
REQ_SYNC | REQ_FUA, false)) {
__free_page(page);
@ -1976,7 +1975,7 @@ r5l_recovery_verify_data_checksum(struct r5l_log *log,
r5l_recovery_read_page(log, ctx, page, log_offset);
addr = kmap_local_page(page);
checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
checksum = crc32c(log->uuid_checksum, addr, PAGE_SIZE);
kunmap_local(addr);
return (le32_to_cpu(log_checksum) == checksum) ? 0 : -EINVAL;
}
@ -2379,7 +2378,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
raid5_compute_blocknr(sh, i, 0));
addr = kmap_local_page(dev->page);
payload->checksum[0] = cpu_to_le32(
crc32c_le(log->uuid_checksum, addr,
crc32c(log->uuid_checksum, addr,
PAGE_SIZE));
kunmap_local(addr);
sync_page_io(log->rdev, write_pos, PAGE_SIZE,
@ -2392,7 +2391,7 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
}
}
mb->meta_size = cpu_to_le32(offset);
mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum,
mb, PAGE_SIZE));
sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false);
@ -2885,7 +2884,7 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
continue;
addr = kmap_local_page(sh->dev[i].page);
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
sh->dev[i].log_checksum = crc32c(log->uuid_checksum,
addr, PAGE_SIZE);
kunmap_local(addr);
pages++;
@ -2969,7 +2968,7 @@ static int r5l_load_log(struct r5l_log *log)
}
stored_crc = le32_to_cpu(mb->checksum);
mb->checksum = 0;
expected_crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE);
expected_crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE);
if (stored_crc != expected_crc) {
create_super = true;
goto create;
@ -3077,7 +3076,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
return -ENOMEM;
log->rdev = rdev;
log->need_cache_flush = bdev_write_cache(rdev->bdev);
log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid,
log->uuid_checksum = crc32c(~0, rdev->mddev->uuid,
sizeof(rdev->mddev->uuid));
mutex_init(&log->io_mutex);

View File

@ -346,7 +346,7 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) {
le32_add_cpu(&e->pp_size, PAGE_SIZE);
io->pp_size += PAGE_SIZE;
e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum),
e->checksum = cpu_to_le32(crc32c(le32_to_cpu(e->checksum),
page_address(sh->ppl_page),
PAGE_SIZE));
}
@ -454,7 +454,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
}
pplhdr->entries_count = cpu_to_le32(io->entries_count);
pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PPL_HEADER_SIZE));
/* Rewind the buffer if current PPL is larger then remaining space */
if (log->use_multippl &&
@ -998,7 +998,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
goto out;
}
crc = crc32c_le(crc, page_address(page), s);
crc = crc32c(crc, page_address(page), s);
pp_size -= s;
sector += s >> 9;
@ -1052,7 +1052,7 @@ static int ppl_write_empty_header(struct ppl_log *log)
log->rdev->ppl.size, GFP_NOIO, 0);
memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
pplhdr->signature = cpu_to_le32(log->ppl_conf->signature);
pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PAGE_SIZE));
if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
@ -1106,7 +1106,7 @@ static int ppl_load_distributed(struct ppl_log *log)
/* check header validity */
crc_stored = le32_to_cpu(pplhdr->checksum);
pplhdr->checksum = 0;
crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
crc = ~crc32c(~0, pplhdr, PAGE_SIZE);
if (crc_stored != crc) {
pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n",
@ -1390,7 +1390,7 @@ int ppl_init_log(struct r5conf *conf)
spin_lock_init(&ppl_conf->no_mem_stripes_lock);
if (!mddev->external) {
ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
ppl_conf->signature = ~crc32c(~0, mddev->uuid, sizeof(mddev->uuid));
ppl_conf->block_size = 512;
} else {
ppl_conf->block_size =

View File

@ -2593,7 +2593,7 @@ void bnx2x_init_rx_mode_obj(struct bnx2x *bp,
/********************* Multicast verbs: SET, CLEAR ****************************/
static inline u8 bnx2x_mcast_bin_from_mac(u8 *mac)
{
return (crc32c_le(0, mac, ETH_ALEN) >> 24) & 0xff;
return (crc32c(0, mac, ETH_ALEN) >> 24) & 0xff;
}
struct bnx2x_mcast_mac_elem {

View File

@ -312,7 +312,7 @@ static void tb_cfg_print_error(struct tb_ctl *ctl, enum tb_cfg_space space,
static __be32 tb_crc(const void *data, size_t len)
{
return cpu_to_be32(~__crc32c_le(~0, data, len));
return cpu_to_be32(~crc32c(~0, data, len));
}
static void tb_ctl_pkg_free(struct ctl_pkg *pkg)

View File

@ -211,7 +211,7 @@ static u8 tb_crc8(u8 *data, int len)
static u32 tb_crc32(void *data, size_t len)
{
return ~__crc32c_le(~0, data, len);
return ~crc32c(~0, data, len);
}
#define TB_DROM_DATA_START 13

View File

@ -4,9 +4,6 @@
#include <linux/types.h>
#define CRC_T10DIF_DIGEST_SIZE 2
#define CRC_T10DIF_BLOCK_SIZE 1
u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len);
u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len);
@ -22,13 +19,4 @@ static inline u16 crc_t10dif(const u8 *p, size_t len)
return crc_t10dif_update(0, p, len);
}
#if IS_ENABLED(CONFIG_CRC_T10DIF_ARCH)
bool crc_t10dif_is_optimized(void);
#else
static inline bool crc_t10dif_is_optimized(void)
{
return false;
}
#endif
#endif

View File

@ -8,33 +8,32 @@
#include <linux/types.h>
#include <linux/bitrev.h>
u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len);
u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len);
u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len);
u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len);
u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len);
u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len);
u32 crc32_le_arch(u32 crc, const u8 *p, size_t len);
u32 crc32_le_base(u32 crc, const u8 *p, size_t len);
u32 crc32_be_arch(u32 crc, const u8 *p, size_t len);
u32 crc32_be_base(u32 crc, const u8 *p, size_t len);
u32 crc32c_arch(u32 crc, const u8 *p, size_t len);
u32 crc32c_base(u32 crc, const u8 *p, size_t len);
static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len)
static inline u32 crc32_le(u32 crc, const void *p, size_t len)
{
if (IS_ENABLED(CONFIG_CRC32_ARCH))
return crc32_le_arch(crc, p, len);
return crc32_le_base(crc, p, len);
}
static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len)
static inline u32 crc32_be(u32 crc, const void *p, size_t len)
{
if (IS_ENABLED(CONFIG_CRC32_ARCH))
return crc32_be_arch(crc, p, len);
return crc32_be_base(crc, p, len);
}
/* TODO: leading underscores should be dropped once callers have been updated */
static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
static inline u32 crc32c(u32 crc, const void *p, size_t len)
{
if (IS_ENABLED(CONFIG_CRC32_ARCH))
return crc32c_le_arch(crc, p, len);
return crc32c_le_base(crc, p, len);
return crc32c_arch(crc, p, len);
return crc32c_base(crc, p, len);
}
/*
@ -45,7 +44,7 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
*/
#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */
#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */
#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */
#define CRC32C_OPTIMIZATION BIT(2) /* crc32c() is optimized */
#if IS_ENABLED(CONFIG_CRC32_ARCH)
u32 crc32_optimizations(void);
#else
@ -70,36 +69,34 @@ static inline u32 crc32_optimizations(void) { return 0; }
* with the same initializer as crc1, and crc2 seed was 0. See
* also crc32_combine_test().
*/
u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len);
u32 crc32_le_shift(u32 crc, size_t len);
static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
{
return crc32_le_shift(crc1, len2) ^ crc2;
}
u32 crc32c_shift(u32 crc, size_t len);
/**
* __crc32c_le_combine - Combine two crc32c check values into one. For two
* sequences of bytes, seq1 and seq2 with lengths len1
* and len2, __crc32c_le() check values were calculated
* for each, crc1 and crc2.
* crc32c_combine - Combine two crc32c check values into one. For two sequences
* of bytes, seq1 and seq2 with lengths len1 and len2, crc32c()
* check values were calculated for each, crc1 and crc2.
*
* @crc1: crc32c of the first block
* @crc2: crc32c of the second block
* @len2: length of the second block
*
* Return: The __crc32c_le() check value of seq1 and seq2 concatenated,
* requiring only crc1, crc2, and len2. Note: If seq_full denotes
* the concatenated memory area of seq1 with seq2, and crc_full
* the __crc32c_le() value of seq_full, then crc_full ==
* __crc32c_le_combine(crc1, crc2, len2) when crc_full was
* seeded with the same initializer as crc1, and crc2 seed
* was 0. See also crc32c_combine_test().
* Return: The crc32c() check value of seq1 and seq2 concatenated, requiring
* only crc1, crc2, and len2. Note: If seq_full denotes the concatenated
* memory area of seq1 with seq2, and crc_full the crc32c() value of
* seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when
* crc_full was seeded with the same initializer as crc1, and crc2 seed
* was 0. See also crc_combine_test().
*/
u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len);
static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2)
{
return __crc32c_le_shift(crc1, len2) ^ crc2;
return crc32c_shift(crc1, len2) ^ crc2;
}
#define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length)

View File

@ -4,12 +4,4 @@
#include <linux/crc32.h>
static inline u32 crc32c(u32 crc, const void *address, unsigned int length)
{
return __crc32c_le(crc, address, length);
}
/* This macro exists for backwards-compatibility. */
#define crc32c_le crc32c
#endif /* _LINUX_CRC32C_H */

Some files were not shown because too many files have changed in this diff Show More