x86/crc32: update prototype for crc_pcl()
- Change the len parameter from unsigned int to size_t, so that the library function which takes a size_t can safely use this code. - Rename to crc32c_x86_3way() which is much clearer. - Move the crc parameter to the front, as this is the usual convention. Reviewed-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20241202010844.144356-12-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@google.com>
This commit is contained in:
@@ -41,8 +41,7 @@
|
||||
*/
|
||||
#define CRC32C_PCL_BREAKEVEN 512
|
||||
|
||||
asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len,
|
||||
unsigned int crc_init);
|
||||
asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
|
||||
@@ -159,7 +158,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
|
||||
*/
|
||||
if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
|
||||
kernel_fpu_begin();
|
||||
*crcp = crc_pcl(data, len, *crcp);
|
||||
*crcp = crc32c_x86_3way(*crcp, data, len);
|
||||
kernel_fpu_end();
|
||||
} else
|
||||
*crcp = crc32c_intel_le_hw(*crcp, data, len);
|
||||
@@ -171,7 +170,7 @@ static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
|
||||
{
|
||||
if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
|
||||
kernel_fpu_begin();
|
||||
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
|
||||
*(__le32 *)out = ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len));
|
||||
kernel_fpu_end();
|
||||
} else
|
||||
*(__le32 *)out =
|
||||
|
||||
@@ -52,15 +52,16 @@
|
||||
# regular CRC code that does not interleave the CRC instructions.
|
||||
#define SMALL_SIZE 200
|
||||
|
||||
# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int crc_init);
|
||||
# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
|
||||
|
||||
.text
|
||||
SYM_FUNC_START(crc_pcl)
|
||||
#define bufp %rdi
|
||||
#define bufp_d %edi
|
||||
#define len %esi
|
||||
#define crc_init %edx
|
||||
#define crc_init_q %rdx
|
||||
SYM_FUNC_START(crc32c_x86_3way)
|
||||
#define crc0 %edi
|
||||
#define crc0_q %rdi
|
||||
#define bufp %rsi
|
||||
#define bufp_d %esi
|
||||
#define len %rdx
|
||||
#define len_dw %edx
|
||||
#define n_misaligned %ecx /* overlaps chunk_bytes! */
|
||||
#define n_misaligned_q %rcx
|
||||
#define chunk_bytes %ecx /* overlaps n_misaligned! */
|
||||
@@ -85,9 +86,9 @@ SYM_FUNC_START(crc_pcl)
|
||||
.Ldo_align:
|
||||
movq (bufp), %rax
|
||||
add n_misaligned_q, bufp
|
||||
sub n_misaligned, len
|
||||
sub n_misaligned_q, len
|
||||
.Lalign_loop:
|
||||
crc32b %al, crc_init # compute crc32 of 1-byte
|
||||
crc32b %al, crc0 # compute crc32 of 1-byte
|
||||
shr $8, %rax # get next byte
|
||||
dec n_misaligned
|
||||
jne .Lalign_loop
|
||||
@@ -102,7 +103,7 @@ SYM_FUNC_START(crc_pcl)
|
||||
|
||||
.Lpartial_block:
|
||||
# Compute floor(len / 24) to get num qwords to process from each lane.
|
||||
imul $2731, len, %eax # 2731 = ceil(2^16 / 24)
|
||||
imul $2731, len_dw, %eax # 2731 = ceil(2^16 / 24)
|
||||
shr $16, %eax
|
||||
jmp .Lcrc_3lanes
|
||||
|
||||
@@ -125,16 +126,16 @@ SYM_FUNC_START(crc_pcl)
|
||||
# Unroll the loop by a factor of 4 to reduce the overhead of the loop
|
||||
# bookkeeping instructions, which can compete with crc32q for the ALUs.
|
||||
.Lcrc_3lanes_4x_loop:
|
||||
crc32q (bufp), crc_init_q
|
||||
crc32q (bufp), crc0_q
|
||||
crc32q (bufp,chunk_bytes_q), crc1
|
||||
crc32q (bufp,chunk_bytes_q,2), crc2
|
||||
crc32q 8(bufp), crc_init_q
|
||||
crc32q 8(bufp), crc0_q
|
||||
crc32q 8(bufp,chunk_bytes_q), crc1
|
||||
crc32q 8(bufp,chunk_bytes_q,2), crc2
|
||||
crc32q 16(bufp), crc_init_q
|
||||
crc32q 16(bufp), crc0_q
|
||||
crc32q 16(bufp,chunk_bytes_q), crc1
|
||||
crc32q 16(bufp,chunk_bytes_q,2), crc2
|
||||
crc32q 24(bufp), crc_init_q
|
||||
crc32q 24(bufp), crc0_q
|
||||
crc32q 24(bufp,chunk_bytes_q), crc1
|
||||
crc32q 24(bufp,chunk_bytes_q,2), crc2
|
||||
add $32, bufp
|
||||
@@ -146,7 +147,7 @@ SYM_FUNC_START(crc_pcl)
|
||||
jz .Lcrc_3lanes_last_qword
|
||||
|
||||
.Lcrc_3lanes_1x_loop:
|
||||
crc32q (bufp), crc_init_q
|
||||
crc32q (bufp), crc0_q
|
||||
crc32q (bufp,chunk_bytes_q), crc1
|
||||
crc32q (bufp,chunk_bytes_q,2), crc2
|
||||
add $8, bufp
|
||||
@@ -154,7 +155,7 @@ SYM_FUNC_START(crc_pcl)
|
||||
jnz .Lcrc_3lanes_1x_loop
|
||||
|
||||
.Lcrc_3lanes_last_qword:
|
||||
crc32q (bufp), crc_init_q
|
||||
crc32q (bufp), crc0_q
|
||||
crc32q (bufp,chunk_bytes_q), crc1
|
||||
# SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet
|
||||
|
||||
@@ -165,9 +166,9 @@ SYM_FUNC_START(crc_pcl)
|
||||
lea (K_table-8)(%rip), %rax # first entry is for idx 1
|
||||
pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2
|
||||
lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
|
||||
sub %eax, len # len -= chunk_bytes * 3
|
||||
sub %rax, len # len -= chunk_bytes * 3
|
||||
|
||||
movq crc_init_q, %xmm1 # CRC for block 1
|
||||
movq crc0_q, %xmm1 # CRC for block 1
|
||||
pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2
|
||||
|
||||
movq crc1, %xmm2 # CRC for block 2
|
||||
@@ -176,8 +177,8 @@ SYM_FUNC_START(crc_pcl)
|
||||
pxor %xmm2,%xmm1
|
||||
movq %xmm1, %rax
|
||||
xor (bufp,chunk_bytes_q,2), %rax
|
||||
mov crc2, crc_init_q
|
||||
crc32 %rax, crc_init_q
|
||||
mov crc2, crc0_q
|
||||
crc32 %rax, crc0_q
|
||||
lea 8(bufp,chunk_bytes_q,2), bufp
|
||||
|
||||
################################################################
|
||||
@@ -193,34 +194,34 @@ SYM_FUNC_START(crc_pcl)
|
||||
## 6) Process any remainder without interleaving:
|
||||
#######################################################################
|
||||
.Lsmall:
|
||||
test len, len
|
||||
test len_dw, len_dw
|
||||
jz .Ldone
|
||||
mov len, %eax
|
||||
mov len_dw, %eax
|
||||
shr $3, %eax
|
||||
jz .Ldo_dword
|
||||
.Ldo_qwords:
|
||||
crc32q (bufp), crc_init_q
|
||||
crc32q (bufp), crc0_q
|
||||
add $8, bufp
|
||||
dec %eax
|
||||
jnz .Ldo_qwords
|
||||
.Ldo_dword:
|
||||
test $4, len
|
||||
test $4, len_dw
|
||||
jz .Ldo_word
|
||||
crc32l (bufp), crc_init
|
||||
crc32l (bufp), crc0
|
||||
add $4, bufp
|
||||
.Ldo_word:
|
||||
test $2, len
|
||||
test $2, len_dw
|
||||
jz .Ldo_byte
|
||||
crc32w (bufp), crc_init
|
||||
crc32w (bufp), crc0
|
||||
add $2, bufp
|
||||
.Ldo_byte:
|
||||
test $1, len
|
||||
test $1, len_dw
|
||||
jz .Ldone
|
||||
crc32b (bufp), crc_init
|
||||
crc32b (bufp), crc0
|
||||
.Ldone:
|
||||
mov crc_init, %eax
|
||||
mov crc0, %eax
|
||||
RET
|
||||
SYM_FUNC_END(crc_pcl)
|
||||
SYM_FUNC_END(crc32c_x86_3way)
|
||||
|
||||
.section .rodata, "a", @progbits
|
||||
################################################################
|
||||
|
||||
Reference in New Issue
Block a user