x86: memcpy, clean up
Impact: cleanup Make this file more readable by bringing it more in line with the usual kernel style. Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
+81
-55
@@ -1,30 +1,38 @@
|
|||||||
/* Copyright 2002 Andi Kleen */
|
/* Copyright 2002 Andi Kleen */
|
||||||
|
|
||||||
#include <linux/linkage.h>
|
#include <linux/linkage.h>
|
||||||
#include <asm/dwarf2.h>
|
|
||||||
#include <asm/cpufeature.h>
|
#include <asm/cpufeature.h>
|
||||||
|
#include <asm/dwarf2.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* memcpy - Copy a memory block.
|
* memcpy - Copy a memory block.
|
||||||
*
|
*
|
||||||
* Input:
|
* Input:
|
||||||
* rdi destination
|
* rdi destination
|
||||||
* rsi source
|
* rsi source
|
||||||
* rdx count
|
* rdx count
|
||||||
*
|
*
|
||||||
* Output:
|
* Output:
|
||||||
* rax original destination
|
* rax original destination
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* memcpy_c() - fast string ops (REP MOVSQ) based variant.
|
||||||
|
*
|
||||||
|
* Calls to this get patched into the kernel image via the
|
||||||
|
* alternative instructions framework:
|
||||||
|
*/
|
||||||
ALIGN
|
ALIGN
|
||||||
memcpy_c:
|
memcpy_c:
|
||||||
CFI_STARTPROC
|
CFI_STARTPROC
|
||||||
movq %rdi,%rax
|
movq %rdi, %rax
|
||||||
movl %edx,%ecx
|
|
||||||
shrl $3,%ecx
|
movl %edx, %ecx
|
||||||
andl $7,%edx
|
shrl $3, %ecx
|
||||||
|
andl $7, %edx
|
||||||
rep movsq
|
rep movsq
|
||||||
movl %edx,%ecx
|
movl %edx, %ecx
|
||||||
rep movsb
|
rep movsb
|
||||||
ret
|
ret
|
||||||
CFI_ENDPROC
|
CFI_ENDPROC
|
||||||
@@ -33,92 +41,110 @@ ENDPROC(memcpy_c)
|
|||||||
ENTRY(__memcpy)
|
ENTRY(__memcpy)
|
||||||
ENTRY(memcpy)
|
ENTRY(memcpy)
|
||||||
CFI_STARTPROC
|
CFI_STARTPROC
|
||||||
movq %rdi,%rax
|
|
||||||
|
|
||||||
movl %edx,%ecx
|
/*
|
||||||
shrl $6,%ecx
|
* Put the number of full 64-byte blocks into %ecx.
|
||||||
|
* Tail portion is handled at the end:
|
||||||
|
*/
|
||||||
|
movq %rdi, %rax
|
||||||
|
movl %edx, %ecx
|
||||||
|
shrl $6, %ecx
|
||||||
jz .Lhandle_tail
|
jz .Lhandle_tail
|
||||||
|
|
||||||
.p2align 4
|
.p2align 4
|
||||||
.Lloop_64:
|
.Lloop_64:
|
||||||
|
/*
|
||||||
|
* We decrement the loop index here - and the zero-flag is
|
||||||
|
* checked at the end of the loop (instructions inbetween do
|
||||||
|
* not change the zero flag):
|
||||||
|
*/
|
||||||
decl %ecx
|
decl %ecx
|
||||||
|
|
||||||
movq (%rsi),%r11
|
/*
|
||||||
movq 8(%rsi),%r8
|
* Move in blocks of 4x16 bytes:
|
||||||
|
*/
|
||||||
|
movq 0*8(%rsi), %r11
|
||||||
|
movq 1*8(%rsi), %r8
|
||||||
|
movq %r11, 0*8(%rdi)
|
||||||
|
movq %r8, 1*8(%rdi)
|
||||||
|
|
||||||
movq %r11,(%rdi)
|
movq 2*8(%rsi), %r9
|
||||||
movq %r8,1*8(%rdi)
|
movq 3*8(%rsi), %r10
|
||||||
|
movq %r9, 2*8(%rdi)
|
||||||
|
movq %r10, 3*8(%rdi)
|
||||||
|
|
||||||
movq 2*8(%rsi),%r9
|
movq 4*8(%rsi), %r11
|
||||||
movq 3*8(%rsi),%r10
|
movq 5*8(%rsi), %r8
|
||||||
|
movq %r11, 4*8(%rdi)
|
||||||
|
movq %r8, 5*8(%rdi)
|
||||||
|
|
||||||
movq %r9,2*8(%rdi)
|
movq 6*8(%rsi), %r9
|
||||||
movq %r10,3*8(%rdi)
|
movq 7*8(%rsi), %r10
|
||||||
|
movq %r9, 6*8(%rdi)
|
||||||
|
movq %r10, 7*8(%rdi)
|
||||||
|
|
||||||
movq 4*8(%rsi),%r11
|
leaq 64(%rsi), %rsi
|
||||||
movq 5*8(%rsi),%r8
|
leaq 64(%rdi), %rdi
|
||||||
|
|
||||||
movq %r11,4*8(%rdi)
|
|
||||||
movq %r8,5*8(%rdi)
|
|
||||||
|
|
||||||
movq 6*8(%rsi),%r9
|
|
||||||
movq 7*8(%rsi),%r10
|
|
||||||
|
|
||||||
movq %r9,6*8(%rdi)
|
|
||||||
movq %r10,7*8(%rdi)
|
|
||||||
|
|
||||||
leaq 64(%rsi),%rsi
|
|
||||||
leaq 64(%rdi),%rdi
|
|
||||||
jnz .Lloop_64
|
jnz .Lloop_64
|
||||||
|
|
||||||
.Lhandle_tail:
|
.Lhandle_tail:
|
||||||
movl %edx,%ecx
|
movl %edx, %ecx
|
||||||
andl $63,%ecx
|
andl $63, %ecx
|
||||||
shrl $3,%ecx
|
shrl $3, %ecx
|
||||||
jz .Lhandle_7
|
jz .Lhandle_7
|
||||||
|
|
||||||
.p2align 4
|
.p2align 4
|
||||||
.Lloop_8:
|
.Lloop_8:
|
||||||
decl %ecx
|
decl %ecx
|
||||||
movq (%rsi),%r8
|
movq (%rsi), %r8
|
||||||
movq %r8,(%rdi)
|
movq %r8, (%rdi)
|
||||||
leaq 8(%rdi),%rdi
|
leaq 8(%rdi), %rdi
|
||||||
leaq 8(%rsi),%rsi
|
leaq 8(%rsi), %rsi
|
||||||
jnz .Lloop_8
|
jnz .Lloop_8
|
||||||
|
|
||||||
.Lhandle_7:
|
.Lhandle_7:
|
||||||
movl %edx,%ecx
|
movl %edx, %ecx
|
||||||
andl $7,%ecx
|
andl $7, %ecx
|
||||||
jz .Lende
|
jz .Lend
|
||||||
|
|
||||||
.p2align 4
|
.p2align 4
|
||||||
.Lloop_1:
|
.Lloop_1:
|
||||||
movb (%rsi),%r8b
|
movb (%rsi), %r8b
|
||||||
movb %r8b,(%rdi)
|
movb %r8b, (%rdi)
|
||||||
incq %rdi
|
incq %rdi
|
||||||
incq %rsi
|
incq %rsi
|
||||||
decl %ecx
|
decl %ecx
|
||||||
jnz .Lloop_1
|
jnz .Lloop_1
|
||||||
|
|
||||||
.Lende:
|
.Lend:
|
||||||
ret
|
ret
|
||||||
CFI_ENDPROC
|
CFI_ENDPROC
|
||||||
ENDPROC(memcpy)
|
ENDPROC(memcpy)
|
||||||
ENDPROC(__memcpy)
|
ENDPROC(__memcpy)
|
||||||
|
|
||||||
/* Some CPUs run faster using the string copy instructions.
|
/*
|
||||||
It is also a lot simpler. Use this when possible */
|
* Some CPUs run faster using the string copy instructions.
|
||||||
|
* It is also a lot simpler. Use this when possible:
|
||||||
|
*/
|
||||||
|
|
||||||
.section .altinstr_replacement,"ax"
|
.section .altinstr_replacement, "ax"
|
||||||
1: .byte 0xeb /* jmp <disp8> */
|
1: .byte 0xeb /* jmp <disp8> */
|
||||||
.byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
|
.byte (memcpy_c - memcpy) - (2f - 1b) /* offset */
|
||||||
2:
|
2:
|
||||||
.previous
|
.previous
|
||||||
.section .altinstructions,"a"
|
|
||||||
|
.section .altinstructions, "a"
|
||||||
.align 8
|
.align 8
|
||||||
.quad memcpy
|
.quad memcpy
|
||||||
.quad 1b
|
.quad 1b
|
||||||
.byte X86_FEATURE_REP_GOOD
|
.byte X86_FEATURE_REP_GOOD
|
||||||
/* Replace only beginning, memcpy is used to apply alternatives, so it
|
|
||||||
* is silly to overwrite itself with nops - reboot is only outcome... */
|
/*
|
||||||
|
* Replace only beginning, memcpy is used to apply alternatives,
|
||||||
|
* so it is silly to overwrite itself with nops - reboot is the
|
||||||
|
* only outcome...
|
||||||
|
*/
|
||||||
.byte 2b - 1b
|
.byte 2b - 1b
|
||||||
.byte 2b - 1b
|
.byte 2b - 1b
|
||||||
.previous
|
.previous
|
||||||
|
|||||||
Reference in New Issue
Block a user