From 2468b0e3d5659dfde77f081f266e1111a981efb8 Mon Sep 17 00:00:00 2001 From: Tianyang Zhang Date: Wed, 14 May 2025 22:17:43 +0800 Subject: [PATCH 1/6] LoongArch: Prevent cond_resched() occurring within kernel-fpu When CONFIG_PREEMPT_COUNT is not configured (i.e. CONFIG_PREEMPT_NONE/ CONFIG_PREEMPT_VOLUNTARY), preempt_disable() / preempt_enable() merely acts as a barrier(). However, in these cases cond_resched() can still trigger a context switch and modify the CSR.EUEN, resulting in do_fpu() exception being activated within the kernel-fpu critical sections, as demonstrated in the following path: dcn32_calculate_wm_and_dlg() DC_FP_START() dcn32_calculate_wm_and_dlg_fpu() dcn32_find_dummy_latency_index_for_fw_based_mclk_switch() dcn32_internal_validate_bw() dcn32_enable_phantom_stream() dc_create_stream_for_sink() kzalloc(GFP_KERNEL) __kmem_cache_alloc_node() __cond_resched() DC_FP_END() This patch is similar to commit d02198550423a0b (x86/fpu: Improve crypto performance by making kernel-mode FPU reliably usable in softirqs). It uses local_bh_disable() instead of preempt_disable() for non-RT kernels so it can avoid the cond_resched() issue, and also extend the kernel-fpu application scenarios to the softirq context. Cc: stable@vger.kernel.org Signed-off-by: Tianyang Zhang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/kfpu.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/kfpu.c b/arch/loongarch/kernel/kfpu.c index ec5b28e570c9..4c476904227f 100644 --- a/arch/loongarch/kernel/kfpu.c +++ b/arch/loongarch/kernel/kfpu.c @@ -18,11 +18,28 @@ static unsigned int euen_mask = CSR_EUEN_FPEN; static DEFINE_PER_CPU(bool, in_kernel_fpu); static DEFINE_PER_CPU(unsigned int, euen_current); +static inline void fpregs_lock(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); + else + local_bh_disable(); +} + +static inline void fpregs_unlock(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); + else + local_bh_enable(); +} + void kernel_fpu_begin(void) { unsigned int *euen_curr; - preempt_disable(); + if (!irqs_disabled()) + fpregs_lock(); WARN_ON(this_cpu_read(in_kernel_fpu)); @@ -73,7 +90,8 @@ void kernel_fpu_end(void) this_cpu_write(in_kernel_fpu, false); - preempt_enable(); + if (!irqs_disabled()) + fpregs_unlock(); } EXPORT_SYMBOL_GPL(kernel_fpu_end); From 90436d234230e9a950ccd87831108b688b27a234 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 14 May 2025 22:17:43 +0800 Subject: [PATCH 2/6] LoongArch: Fix MAX_REG_OFFSET calculation Fix MAX_REG_OFFSET calculation, make it point to the last register in 'struct pt_regs' and not to the marker itself, which could allow regs_get_register() to return an invalid offset. Cc: stable@vger.kernel.org Fixes: 803b0fc5c3f2baa6e5 ("LoongArch: Add process management") Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index a5b63c84f854..e5d21e836d99 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -55,7 +55,7 @@ static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long v /* Query offset/name of register from its name/offset */ extern int regs_query_register_offset(const char *name); -#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last)) +#define MAX_REG_OFFSET (offsetof(struct pt_regs, __last) - sizeof(unsigned long)) /** * regs_get_register() - get register value from its offset From 3e245b7b74c3a2ead5fa4bad27cc275284c75189 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 14 May 2025 22:17:52 +0800 Subject: [PATCH 3/6] LoongArch: Move __arch_cpu_idle() to .cpuidle.text section Now arch_cpu_idle() is annotated with __cpuidle which means it is in the .cpuidle.text section, but __arch_cpu_idle() isn't. Thus, fix the missing .cpuidle.text section assignment for __arch_cpu_idle() in order to correct backtracing with nmi_backtrace(). The principle is similar to the commit 97c8580e85cf81c ("MIPS: Annotate cpu_wait implementations with __cpuidle") Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen --- arch/loongarch/kernel/genex.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 4f0912141781..733a7665e434 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -16,6 +16,7 @@ #include #include + .section .cpuidle.text, "ax" .align 5 SYM_FUNC_START(__arch_cpu_idle) /* start of idle interrupt region */ @@ -31,14 +32,16 @@ SYM_FUNC_START(__arch_cpu_idle) */ idle 0 /* end of idle interrupt region */ -1: jr ra +idle_exit: + jr ra SYM_FUNC_END(__arch_cpu_idle) + .previous SYM_CODE_START(handle_vint) UNWIND_HINT_UNDEFINED BACKUP_T0T1 SAVE_ALL - la_abs t1, 1b + la_abs t1, idle_exit LONG_L t0, sp, PT_ERA /* 3 instructions idle interrupt region */ ori t0, t0, 0b1100 From ceb9155d058a11242aa0572875c44e9713b1a2be Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 14 May 2025 22:17:52 +0800 Subject: [PATCH 4/6] LoongArch: Save and restore CSR.CNTC for hibernation Save and restore CSR.CNTC for hibernation which is similar to suspend. For host this is unnecessary because sched clock is ensured continuous, but for kvm guest sched clock isn't enough because rdtime.d should also be continuous. Host::rdtime.d = Host::CSR.CNTC + counter Guest::rdtime.d = Host::CSR.CNTC + Host::CSR.GCNTC + Guest::CSR.CNTC + counter so, Guest::rdtime.d = Host::rdtime.d + Host::CSR.GCNTC + Guest::CSR.CNTC To ensure Guest::rdtime.d continuous, Host::rdtime.d should be at first continuous, while Host::CSR.GCNTC / Guest::CSR.CNTC is maintained by KVM. Cc: stable@vger.kernel.org Signed-off-by: Xianglai Li Signed-off-by: Huacai Chen --- arch/loongarch/kernel/time.c | 2 +- arch/loongarch/power/hibernate.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index e2d3bfeb6366..bc75a3a69fc8 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -111,7 +111,7 @@ static unsigned long __init get_loops_per_jiffy(void) return lpj; } -static long init_offset __nosavedata; +static long init_offset; void save_counter(void) { diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c index 1e0590542f98..e7b7346592cb 100644 --- a/arch/loongarch/power/hibernate.c +++ b/arch/loongarch/power/hibernate.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -14,6 +15,7 @@ struct pt_regs saved_regs; void save_processor_state(void) { + save_counter(); saved_crmd = csr_read32(LOONGARCH_CSR_CRMD); saved_prmd = csr_read32(LOONGARCH_CSR_PRMD); saved_euen = csr_read32(LOONGARCH_CSR_EUEN); @@ -26,6 +28,7 @@ void save_processor_state(void) void restore_processor_state(void) { + sync_counter(); csr_write32(saved_crmd, LOONGARCH_CSR_CRMD); csr_write32(saved_prmd, LOONGARCH_CSR_PRMD); csr_write32(saved_euen, LOONGARCH_CSR_EUEN); From 0b326b2371f94e798137cc1a3c5c2eef2bc69061 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 14 May 2025 22:18:10 +0800 Subject: [PATCH 5/6] LoongArch: uprobes: Remove user_{en,dis}able_single_step() When executing the "perf probe" and "perf stat" test cases about some cryptographic algorithm, the output shows that "Trace/breakpoint trap". This is because it uses the software singlestep breakpoint for uprobes on LoongArch, and no need to use the hardware singlestep. So just remove the related function call to user_{en,dis}able_single_step() for uprobes on LoongArch. How to reproduce: Please make sure CONFIG_UPROBE_EVENTS is set and openssl supports sm2 algorithm, then execute the following command. cd tools/perf && make ./perf probe -x /usr/lib64/libcrypto.so BN_mod_mul_montgomery ./perf stat -e probe_libcrypto:BN_mod_mul_montgomery openssl speed sm2 Cc: stable@vger.kernel.org Fixes: 19bc6cb64092 ("LoongArch: Add uprobes support") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/kernel/uprobes.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/loongarch/kernel/uprobes.c b/arch/loongarch/kernel/uprobes.c index 87abc7137b73..0ab9d8d631c4 100644 --- a/arch/loongarch/kernel/uprobes.c +++ b/arch/loongarch/kernel/uprobes.c @@ -42,7 +42,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) utask->autask.saved_trap_nr = current->thread.trap_nr; current->thread.trap_nr = UPROBE_TRAP_NR; instruction_pointer_set(regs, utask->xol_vaddr); - user_enable_single_step(current); return 0; } @@ -59,8 +58,6 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) else instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE); - user_disable_single_step(current); - return 0; } @@ -70,7 +67,6 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) current->thread.trap_nr = utask->autask.saved_trap_nr; instruction_pointer_set(regs, utask->vaddr); - user_disable_single_step(current); } bool arch_uprobe_xol_was_trapped(struct task_struct *t) From 12614f794274f63fbdfe76771b2b332077d63848 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 14 May 2025 22:18:10 +0800 Subject: [PATCH 6/6] LoongArch: uprobes: Remove redundant code about resume_era arch_uprobe_skip_sstep() returns true if instruction was emulated, that is to say, there is no need to single step for the emulated instructions. regs->csr_era will point to the destination address directly after the exception, so the resume_era related code is redundant, just remove them. Cc: stable@vger.kernel.org Fixes: 19bc6cb64092 ("LoongArch: Add uprobes support") Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/uprobes.h | 1 - arch/loongarch/kernel/uprobes.c | 7 +------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/loongarch/include/asm/uprobes.h b/arch/loongarch/include/asm/uprobes.h index 99a0d198927f..025fc3f0a102 100644 --- a/arch/loongarch/include/asm/uprobes.h +++ b/arch/loongarch/include/asm/uprobes.h @@ -15,7 +15,6 @@ typedef u32 uprobe_opcode_t; #define UPROBE_XOLBP_INSN __emit_break(BRK_UPROBE_XOLBP) struct arch_uprobe { - unsigned long resume_era; u32 insn[2]; u32 ixol[2]; bool simulate; diff --git a/arch/loongarch/kernel/uprobes.c b/arch/loongarch/kernel/uprobes.c index 0ab9d8d631c4..6022eb0f71db 100644 --- a/arch/loongarch/kernel/uprobes.c +++ b/arch/loongarch/kernel/uprobes.c @@ -52,11 +52,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); current->thread.trap_nr = utask->autask.saved_trap_nr; - - if (auprobe->simulate) - instruction_pointer_set(regs, auprobe->resume_era); - else - instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE); + instruction_pointer_set(regs, utask->vaddr + LOONGARCH_INSN_SIZE); return 0; } @@ -86,7 +82,6 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) insn.word = auprobe->insn[0]; arch_simulate_insn(insn, regs); - auprobe->resume_era = regs->csr_era; return true; }