From a784101f77b1bef4b40f4ad68af3f54fcfa5321b Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 22 Jun 2022 15:52:35 +1000 Subject: [PATCH 01/10] KVM: PPC: Book3s: Fix warning about xics_rm_h_xirr_x This fixes "no previous prototype": arch/powerpc/kvm/book3s_hv_rm_xics.c:482:15: warning: no previous prototype for 'xics_rm_h_xirr_x' [-Wmissing-prototypes] Reported by the kernel test robot. Fixes: b22af9041927 ("KVM: PPC: Book3s: Remove real mode interrupt controller hcalls handlers") Signed-off-by: Alexey Kardashevskiy Reviewed-by: Greg Kurz Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220622055235.1139204-1-aik@ozlabs.ru --- arch/powerpc/kvm/book3s_xics.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index 8e4c79e2fcd8..08fb0843faf5 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -143,6 +143,7 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, } extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu); +extern unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu); extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, unsigned long mfrr); extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); From 9981bace85d816ed8724ac46e49285e8488d29e6 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:50 -0300 Subject: [PATCH 02/10] KVM: PPC: Book3S HV: Fix "rm_exit" entry in debugfs timings At debugfs/kvm//vcpu0/timings we show how long each part of the code takes to run: $ cat /sys/kernel/debug/kvm/*-*/vcpu0/timings rm_entry: 123785 49398892 118 4898 rm_intr: 123780 6075890 22 390 rm_exit: 0 0 0 0 <-- NOK guest: 123780 46732919988 402 9997638 cede: 0 0 0 0 <-- OK, no cede napping in P9 The "rm_exit" is always showing zero because it is the last one and end_timing does not increment the counter of the previous entry. We can fix it by calling accumulate_time again instead of end_timing. That way the counter gets incremented. The rest of the arithmetic can be ignored because there are no timing points after this and the accumulators are reset before the next round. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-2-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv_p9_entry.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 112a09b33328..7f88be386b27 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -438,15 +438,6 @@ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING -static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) -{ - struct kvmppc_vcore *vc = vcpu->arch.vcore; - u64 tb = mftb() - vc->tb_offset_applied; - - vcpu->arch.cur_activity = next; - vcpu->arch.cur_tb_start = tb; -} - static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) { struct kvmppc_vcore *vc = vcpu->arch.vcore; @@ -478,8 +469,8 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator curr->seqcount = seq + 2; } -#define start_timing(vcpu, next) __start_timing(vcpu, next) -#define end_timing(vcpu) __start_timing(vcpu, NULL) +#define start_timing(vcpu, next) __accumulate_time(vcpu, next) +#define end_timing(vcpu) __accumulate_time(vcpu, NULL) #define accumulate_time(vcpu, next) __accumulate_time(vcpu, next) #else #define start_timing(vcpu, next) do {} while (0) From 3f8ed993be3cf154a91d9ab5e80470c6c755adbe Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:51 -0300 Subject: [PATCH 03/10] KVM: PPC: Book3S HV: Add a new config for P8 debug timing Turn the existing Kconfig KVM_BOOK3S_HV_EXIT_TIMING into KVM_BOOK3S_HV_P8_TIMING in preparation for the addition of a new config for P9 timings. This applies only to P8 code, the generic timing code is still kept under KVM_BOOK3S_HV_EXIT_TIMING. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-3-farosas@linux.ibm.com --- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kvm/Kconfig | 6 +++++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 24 ++++++++++++------------ 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index eec536aef83a..8c10f536e478 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -379,7 +379,7 @@ int main(void) OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2); OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3); #endif -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry); OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr); OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit); diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index ddd88179110a..73f8277df7d1 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -128,7 +128,11 @@ config KVM_BOOK3S_64_PR and system calls on the host. config KVM_BOOK3S_HV_EXIT_TIMING - bool "Detailed timing for hypervisor real-mode code" + bool + +config KVM_BOOK3S_HV_P8_TIMING + bool "Detailed timing for hypervisor real-mode code (for POWER8)" + select KVM_BOOK3S_HV_EXIT_TIMING depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS help Calculate time taken for each vcpu in the real-mode guest entry, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0fc0e68d20d0..7ded202bf995 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -237,14 +237,14 @@ kvm_novcpu_wakeup: cmpdi r4, 0 beq kvmppc_primary_no_guest -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMENTRY bl kvmhv_start_timing #endif b kvmppc_got_guest kvm_novcpu_exit: -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING ld r4, HSTATE_KVM_VCPU(r13) cmpdi r4, 0 beq 13f @@ -523,7 +523,7 @@ kvmppc_hv_entry: li r6, KVM_GUEST_MODE_HOST_HV stb r6, HSTATE_IN_GUEST(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Store initial timestamp */ cmpdi r4, 0 beq 1f @@ -894,7 +894,7 @@ fast_guest_return: li r9, KVM_GUEST_MODE_GUEST_HV stb r9, HSTATE_IN_GUEST(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Accumulate timing */ addi r3, r4, VCPU_TB_GUEST bl kvmhv_accumulate_time @@ -945,7 +945,7 @@ secondary_too_late: cmpdi r4, 0 beq 11f stw r12, VCPU_TRAP(r4) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif @@ -959,7 +959,7 @@ hdec_soon: li r12, BOOK3S_INTERRUPT_HV_DECREMENTER 12: stw r12, VCPU_TRAP(r4) mr r9, r4 -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMEXIT bl kvmhv_accumulate_time #endif @@ -1056,7 +1056,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) li r0, MSR_RI mtmsrd r0, 1 -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r9, VCPU_TB_RMINTR mr r4, r9 bl kvmhv_accumulate_time @@ -1135,7 +1135,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */ -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r9, VCPU_TB_RMEXIT mr r4, r9 bl kvmhv_accumulate_time @@ -1495,7 +1495,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_LPCR,r8 isync -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* Finish timing, if we have a vcpu */ ld r4, HSTATE_KVM_VCPU(r13) cmpdi r4, 0 @@ -2153,7 +2153,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM) ld r4, HSTATE_KVM_VCPU(r13) std r3, VCPU_DEC_EXPIRES(r4) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING ld r4, HSTATE_KVM_VCPU(r13) addi r3, r4, VCPU_TB_CEDE bl kvmhv_accumulate_time @@ -2221,7 +2221,7 @@ kvm_end_cede: /* get vcpu pointer */ ld r4, HSTATE_KVM_VCPU(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING addi r3, r4, VCPU_TB_RMINTR bl kvmhv_accumulate_time #endif @@ -2961,7 +2961,7 @@ kvmppc_fix_pmao: isync blr -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING /* * Start timing an activity * r3 = pointer to time accumulation struct, r4 = vcpu From c3fa64c99c61d99631a8e06a6cf991c35c98ec7d Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:52 -0300 Subject: [PATCH 04/10] KVM: PPC: Book3S HV: Decouple the debug timing from the P8 entry path We are currently doing the timing for debug purposes of the P9 entry path using the accumulators and terminology defined by the old entry path for P8 machines. Not only the "real-mode" and "napping" mentions are out of place for the P9 Radix entry path but also we cannot change them because the timing code is coupled to the structures defined in struct kvm_vcpu_arch. Add a new CONFIG_KVM_BOOK3S_HV_P9_TIMING to enable the timing code for the P9 entry path. For now, just add the new CONFIG and duplicate the structures. A subsequent patch will add the P9 changes. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-4-farosas@linux.ibm.com --- arch/powerpc/include/asm/kvm_host.h | 8 ++++++++ arch/powerpc/kvm/Kconfig | 14 +++++++++++++- arch/powerpc/kvm/book3s_hv.c | 13 +++++++++++-- arch/powerpc/kvm/book3s_hv_p9_entry.c | 2 +- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 2909a88acd16..a0f44762a069 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -830,11 +830,19 @@ struct kvm_vcpu_arch { #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */ u64 cur_tb_start; /* when it started */ +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ struct kvmhv_tb_accumulator guest_time; /* guest execution */ struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ +#else + struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ + struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ + struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ + struct kvmhv_tb_accumulator guest_time; /* guest execution */ + struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ +#endif #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ }; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 73f8277df7d1..191347f44731 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -130,10 +130,22 @@ config KVM_BOOK3S_64_PR config KVM_BOOK3S_HV_EXIT_TIMING bool +config KVM_BOOK3S_HV_P9_TIMING + bool "Detailed timing for the P9 entry point" + select KVM_BOOK3S_HV_EXIT_TIMING + depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS + help + Calculate time taken for each vcpu in various parts of the + code. The total, minimum and maximum times in nanoseconds + together with the number of executions are reported in debugfs in + kvm/vm#/vcpu#/timings. + + If unsure, say N. + config KVM_BOOK3S_HV_P8_TIMING bool "Detailed timing for hypervisor real-mode code (for POWER8)" select KVM_BOOK3S_HV_EXIT_TIMING - depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS + depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS && !KVM_BOOK3S_HV_P9_TIMING help Calculate time taken for each vcpu in the real-mode guest entry, exit, and interrupt handling code, plus time spent in the guest diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index e08fb3124dca..108ee563f72a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2660,11 +2660,19 @@ static struct debugfs_timings_element { const char *name; size_t offset; } timings[] = { +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, +#else + {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, + {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, + {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, + {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, + {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, +#endif }; #define N_TIMINGS (ARRAY_SIZE(timings)) @@ -2783,8 +2791,9 @@ static const struct file_operations debugfs_timings_ops = { /* Create a debugfs directory for the vcpu */ static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { - debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, - &debugfs_timings_ops); + if (cpu_has_feature(CPU_FTR_ARCH_300) == IS_ENABLED(CONFIG_KVM_BOOK3S_HV_P9_TIMING)) + debugfs_create_file("timings", 0444, debugfs_dentry, vcpu, + &debugfs_timings_ops); return 0; } diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 7f88be386b27..8d8c0cf39bdf 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -437,7 +437,7 @@ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); -#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) { struct kvmppc_vcore *vc = vcpu->arch.vcore; From 2861c827286fb6646f6b6caee418efd99992097c Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:53 -0300 Subject: [PATCH 05/10] KVM: PPC: Book3S HV: Expose timing functions to module code The next patch adds new timing points to the P9 entry path, some of which are in the module code, so we need to export the timing functions. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-5-farosas@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.h | 10 ++++++++++ arch/powerpc/kvm/book3s_hv_p9_entry.c | 11 ++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h index 6b7f07d9026b..2f2e59d7d433 100644 --- a/arch/powerpc/kvm/book3s_hv.h +++ b/arch/powerpc/kvm/book3s_hv.h @@ -40,3 +40,13 @@ void switch_pmu_to_guest(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs); void switch_pmu_to_host(struct kvm_vcpu *vcpu, struct p9_host_os_sprs *host_os_sprs); + +#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING +void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next); +#define start_timing(vcpu, next) accumulate_time(vcpu, next) +#define end_timing(vcpu) accumulate_time(vcpu, NULL) +#else +#define accumulate_time(vcpu, next) do {} while (0) +#define start_timing(vcpu, next) do {} while (0) +#define end_timing(vcpu) do {} while (0) +#endif diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 8d8c0cf39bdf..32e078db8da2 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -438,7 +438,7 @@ void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu, EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs); #ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING -static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) +void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next) { struct kvmppc_vcore *vc = vcpu->arch.vcore; struct kvmhv_tb_accumulator *curr; @@ -468,14 +468,7 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator smp_wmb(); curr->seqcount = seq + 2; } - -#define start_timing(vcpu, next) __accumulate_time(vcpu, next) -#define end_timing(vcpu) __accumulate_time(vcpu, NULL) -#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next) -#else -#define start_timing(vcpu, next) do {} while (0) -#define end_timing(vcpu) do {} while (0) -#define accumulate_time(vcpu, next) do {} while (0) +EXPORT_SYMBOL_GPL(accumulate_time); #endif static inline u64 mfslbv(unsigned int idx) From b44bb1b7cbbae66ec73868f5fbd57c54f0612d1c Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Wed, 25 May 2022 10:05:54 -0300 Subject: [PATCH 06/10] KVM: PPC: Book3S HV: Provide more detailed timings for P9 entry path Alter the data collection points for the debug timing code in the P9 path to be more in line with what the code does. The points where we accumulate time are now the following: vcpu_entry: From vcpu_run_hv entry until the start of the inner loop; guest_entry: From the start of the inner loop until the guest entry in asm; in_guest: From the guest entry in asm until the return to KVM C code; guest_exit: From the return into KVM C code until the corresponding hypercall/page fault handling or re-entry into the guest; hypercall: Time spent handling hcalls in the kernel (hcalls can go to QEMU, not accounted here); page_fault: Time spent handling page faults; vcpu_exit: vcpu_run_hv exit (almost no code here currently). Like before, these are exposed in debugfs in a file called "timings". There are four values: - number of occurrences of the accumulation point; - total time the vcpu spent in the phase in ns; - shortest time the vcpu spent in the phase in ns; - longest time the vcpu spent in the phase in ns; === Before: rm_entry: 53132 16793518 256 4060 rm_intr: 53132 2125914 22 340 rm_exit: 53132 24108344 374 2180 guest: 53132 40980507996 404 9997650 cede: 0 0 0 0 After: vcpu_entry: 34637 7716108 178 4416 guest_entry: 52414 49365608 324 747542 in_guest: 52411 40828715840 258 9997480 guest_exit: 52410 19681717182 826 102496674 vcpu_exit: 34636 1744462 38 182 hypercall: 45712 22878288 38 1307962 page_fault: 992 111104034 568 168688 With just one instruction (hcall): vcpu_entry: 1 942 942 942 guest_entry: 1 4044 4044 4044 in_guest: 1 1540 1540 1540 guest_exit: 1 3542 3542 3542 vcpu_exit: 1 80 80 80 hypercall: 0 0 0 0 page_fault: 0 0 0 0 === Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220525130554.2614394-6-farosas@linux.ibm.com --- arch/powerpc/include/asm/kvm_host.h | 12 +++++++----- arch/powerpc/kvm/Kconfig | 9 +++++---- arch/powerpc/kvm/book3s_hv.c | 23 ++++++++++++++++++----- arch/powerpc/kvm/book3s_hv_p9_entry.c | 16 +++++----------- 4 files changed, 35 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a0f44762a069..eeba679802cf 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -831,11 +831,13 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */ u64 cur_tb_start; /* when it started */ #ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING - struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ - struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ - struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ - struct kvmhv_tb_accumulator guest_time; /* guest execution */ - struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ + struct kvmhv_tb_accumulator vcpu_entry; + struct kvmhv_tb_accumulator vcpu_exit; + struct kvmhv_tb_accumulator in_guest; + struct kvmhv_tb_accumulator hcall; + struct kvmhv_tb_accumulator pg_fault; + struct kvmhv_tb_accumulator guest_entry; + struct kvmhv_tb_accumulator guest_exit; #else struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 191347f44731..cedf1e0f50e1 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -135,10 +135,11 @@ config KVM_BOOK3S_HV_P9_TIMING select KVM_BOOK3S_HV_EXIT_TIMING depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS help - Calculate time taken for each vcpu in various parts of the - code. The total, minimum and maximum times in nanoseconds - together with the number of executions are reported in debugfs in - kvm/vm#/vcpu#/timings. + Calculate time taken for each vcpu during vcpu entry and + exit, time spent inside the guest and time spent handling + hypercalls and page faults. The total, minimum and maximum + times in nanoseconds together with the number of executions + are reported in debugfs in kvm/vm#/vcpu#/timings. If unsure, say N. diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 108ee563f72a..c68883170a82 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2661,11 +2661,13 @@ static struct debugfs_timings_element { size_t offset; } timings[] = { #ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING - {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, - {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, - {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, - {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, - {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, + {"vcpu_entry", offsetof(struct kvm_vcpu, arch.vcpu_entry)}, + {"guest_entry", offsetof(struct kvm_vcpu, arch.guest_entry)}, + {"in_guest", offsetof(struct kvm_vcpu, arch.in_guest)}, + {"guest_exit", offsetof(struct kvm_vcpu, arch.guest_exit)}, + {"vcpu_exit", offsetof(struct kvm_vcpu, arch.vcpu_exit)}, + {"hypercall", offsetof(struct kvm_vcpu, arch.hcall)}, + {"page_fault", offsetof(struct kvm_vcpu, arch.pg_fault)}, #else {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, @@ -4014,8 +4016,10 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns mtspr(SPRN_DAR, vcpu->arch.shregs.dar); mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); switch_pmu_to_guest(vcpu, &host_os_sprs); + accumulate_time(vcpu, &vcpu->arch.in_guest); trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs), __pa(&vcpu->arch.regs)); + accumulate_time(vcpu, &vcpu->arch.guest_exit); kvmhv_restore_hv_return_state(vcpu, &hvregs); switch_pmu_to_host(vcpu, &host_os_sprs); vcpu->arch.shregs.msr = vcpu->arch.regs.msr; @@ -4703,6 +4707,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) struct kvm *kvm; unsigned long msr; + start_timing(vcpu, &vcpu->arch.vcpu_entry); + if (!vcpu->arch.sane) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; return -EINVAL; @@ -4768,6 +4774,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; do { + accumulate_time(vcpu, &vcpu->arch.guest_entry); if (cpu_has_feature(CPU_FTR_ARCH_300)) r = kvmhv_run_single_vcpu(vcpu, ~(u64)0, vcpu->arch.vcore->lpcr); @@ -4775,6 +4782,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) r = kvmppc_run_vcpu(vcpu); if (run->exit_reason == KVM_EXIT_PAPR_HCALL) { + accumulate_time(vcpu, &vcpu->arch.hcall); + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) { /* * These should have been caught reflected @@ -4790,6 +4799,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) trace_kvm_hcall_exit(vcpu, r); kvmppc_core_prepare_to_enter(vcpu); } else if (r == RESUME_PAGE_FAULT) { + accumulate_time(vcpu, &vcpu->arch.pg_fault); srcu_idx = srcu_read_lock(&kvm->srcu); r = kvmppc_book3s_hv_page_fault(vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr); @@ -4801,12 +4811,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu) r = kvmppc_xics_rm_complete(vcpu, 0); } } while (is_kvmppc_resume_guest(r)); + accumulate_time(vcpu, &vcpu->arch.vcpu_exit); vcpu->arch.state = KVMPPC_VCPU_NOTREADY; atomic_dec(&kvm->arch.vcpus_running); srr_regs_clobbered(); + end_timing(vcpu); + return r; } diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 32e078db8da2..e740eca45862 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -779,8 +779,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV); WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME)); - start_timing(vcpu, &vcpu->arch.rm_entry); - vcpu->arch.ceded = 0; /* Save MSR for restore, with EE clear. */ @@ -941,13 +939,13 @@ tm_return_to_guest: mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1); - accumulate_time(vcpu, &vcpu->arch.guest_time); - switch_pmu_to_guest(vcpu, &host_os_sprs); - kvmppc_p9_enter_guest(vcpu); - switch_pmu_to_host(vcpu, &host_os_sprs); + accumulate_time(vcpu, &vcpu->arch.in_guest); - accumulate_time(vcpu, &vcpu->arch.rm_intr); + kvmppc_p9_enter_guest(vcpu); + + accumulate_time(vcpu, &vcpu->arch.guest_exit); + switch_pmu_to_host(vcpu, &host_os_sprs); /* XXX: Could get these from r11/12 and paca exsave instead */ vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0); @@ -1042,8 +1040,6 @@ tm_return_to_guest: #endif } - accumulate_time(vcpu, &vcpu->arch.rm_exit); - /* Advance host PURR/SPURR by the amount used by guest */ purr = mfspr(SPRN_PURR); spurr = mfspr(SPRN_SPURR); @@ -1150,8 +1146,6 @@ tm_return_to_guest: asm volatile(PPC_CP_ABORT); out: - end_timing(vcpu); - return trap; } EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9); From 0df01238b8aa300cbc736e7ec433d201a76036f3 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Tue, 14 Jun 2022 13:52:04 -0300 Subject: [PATCH 07/10] KVM: PPC: Book3S HV: tracing: Add missing hcall names The kvm_trace_symbol_hcall macro is missing several of the hypercalls defined in hvcall.h. Add the most common ones that are issued during guest lifetime, including the ones that are only used by QEMU and SLOF. Signed-off-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220614165204.549229-1-farosas@linux.ibm.com --- arch/powerpc/include/asm/hvcall.h | 8 ++++++++ arch/powerpc/kvm/trace_hv.h | 21 ++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index d92a20a85395..1d454c70e7c6 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -350,6 +350,14 @@ /* Platform specific hcalls, used by KVM */ #define H_RTAS 0xf000 +/* + * Platform specific hcalls, used by QEMU/SLOF. These are ignored by + * KVM and only kept here so we can identify them during tracing. + */ +#define H_LOGICAL_MEMOP 0xF001 +#define H_CAS 0XF002 +#define H_UPDATE_DT 0XF003 + /* "Platform specific hcalls", provided by PHYP */ #define H_GET_24X7_CATALOG_PAGE 0xF078 #define H_GET_24X7_DATA 0xF07C diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 32e2cb5811cc..8d57c8428531 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -94,6 +94,7 @@ {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \ {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \ {H_MANAGE_TRACE, "H_MANAGE_TRACE"}, \ + {H_GET_CPU_CHARACTERISTICS, "H_GET_CPU_CHARACTERISTICS"}, \ {H_FREE_LOGICAL_LAN_BUFFER, "H_FREE_LOGICAL_LAN_BUFFER"}, \ {H_QUERY_INT_STATE, "H_QUERY_INT_STATE"}, \ {H_POLL_PENDING, "H_POLL_PENDING"}, \ @@ -125,7 +126,25 @@ {H_COP, "H_COP"}, \ {H_GET_MPP_X, "H_GET_MPP_X"}, \ {H_SET_MODE, "H_SET_MODE"}, \ - {H_RTAS, "H_RTAS"} + {H_REGISTER_PROC_TBL, "H_REGISTER_PROC_TBL"}, \ + {H_QUERY_VAS_CAPABILITIES, "H_QUERY_VAS_CAPABILITIES"}, \ + {H_INT_GET_SOURCE_INFO, "H_INT_GET_SOURCE_INFO"}, \ + {H_INT_SET_SOURCE_CONFIG, "H_INT_SET_SOURCE_CONFIG"}, \ + {H_INT_GET_QUEUE_INFO, "H_INT_GET_QUEUE_INFO"}, \ + {H_INT_SET_QUEUE_CONFIG, "H_INT_SET_QUEUE_CONFIG"}, \ + {H_INT_ESB, "H_INT_ESB"}, \ + {H_INT_RESET, "H_INT_RESET"}, \ + {H_RPT_INVALIDATE, "H_RPT_INVALIDATE"}, \ + {H_RTAS, "H_RTAS"}, \ + {H_LOGICAL_MEMOP, "H_LOGICAL_MEMOP"}, \ + {H_CAS, "H_CAS"}, \ + {H_UPDATE_DT, "H_UPDATE_DT"}, \ + {H_GET_PERF_COUNTER_INFO, "H_GET_PERF_COUNTER_INFO"}, \ + {H_SET_PARTITION_TABLE, "H_SET_PARTITION_TABLE"}, \ + {H_ENTER_NESTED, "H_ENTER_NESTED"}, \ + {H_TLB_INVALIDATE, "H_TLB_INVALIDATE"}, \ + {H_COPY_TOFROM_GUEST, "H_COPY_TOFROM_GUEST"} + #define kvm_trace_symbol_kvmret \ {RESUME_GUEST, "RESUME_GUEST"}, \ From f5c847ea19d323974d6f7c7e9fa4858ce0727096 Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Fri, 24 Jun 2022 11:27:12 -0300 Subject: [PATCH 08/10] KVM: PPC: Align pt_regs in kvm_vcpu_arch structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The H_ENTER_NESTED hypercall receives as second parameter the address of a region of memory containing the values for the nested guest privileged registers. We currently use the pt_regs structure contained within kvm_vcpu_arch for that end. Most hypercalls that receive a memory address expect that region to not cross a 4K page boundary. We would want H_ENTER_NESTED to follow the same pattern so this patch ensures the pt_regs structure sits within a page. Note: the pt_regs structure is currently 384 bytes in size, so aligning to 512 is sufficient to ensure it will not cross a 4K page and avoids punching too big a hole in struct kvm_vcpu_arch. Signed-off-by: Fabiano Rosas Signed-off-by: Murilo Opsfelder Araújo Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220624142712.790491-1-farosas@linux.ibm.com --- arch/powerpc/include/asm/kvm_host.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index eeba679802cf..c2b003550dc9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -523,7 +523,11 @@ struct kvm_vcpu_arch { struct kvmppc_book3s_shadow_vcpu *shadow_vcpu; #endif - struct pt_regs regs; + /* + * This is passed along to the HV via H_ENTER_NESTED. Align to + * prevent it crossing a real 4K page. + */ + struct pt_regs regs __aligned(512); struct thread_fp_state fp; From 4dee21e0f2520f5032b0abce0ecae593a71bd19d Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 28 Jun 2022 18:02:28 +1000 Subject: [PATCH 09/10] KVM: PPC: Do not warn when userspace asked for too big TCE table KVM manages emulated TCE tables for guest LIOBNs by a two level table which maps up to 128TiB with 16MB IOMMU pages (enabled in QEMU by default) and MAX_ORDER=11 (the kernel's default). Note that the last level of the table is allocated when actual TCE is updated. However these tables are created via ioctl() on kvmfd and the userspace can trigger WARN_ON_ONCE_GFP(order >= MAX_ORDER, gfp) in mm/page_alloc.c and flood dmesg. This adds __GFP_NOWARN. Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220628080228.1508847-1-aik@ozlabs.ru --- arch/powerpc/kvm/book3s_64_vio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index d6589c4fe889..40864373ef87 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -307,7 +307,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, return ret; ret = -ENOMEM; - stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL); + stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL | __GFP_NOWARN); if (!stt) goto fail_acct; From 81e9685dd41384a39adda823df8b4f6e16ec2898 Mon Sep 17 00:00:00 2001 From: Juerg Haefliger Date: Fri, 20 May 2022 13:54:31 +0200 Subject: [PATCH 10/10] KVM: PPC: Kconfig: Fix indentation The convention for indentation seems to be a single tab. Help text is further indented by an additional two whitespaces. Fix the lines that violate these rules. Signed-off-by: Juerg Haefliger Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20220520115431.147593-1-juergh@canonical.com --- arch/powerpc/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index cedf1e0f50e1..dcb398d5e009 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -166,7 +166,7 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND Old nested HV capable Linux guests have a bug where they don't reflect the PMU in-use status of their L2 guest to the L0 host while the L2 PMU registers are live. This can result in loss - of L2 PMU register state, causing perf to not work correctly in + of L2 PMU register state, causing perf to not work correctly in L2 guests. Selecting this option for the L0 host implements a workaround for