From 69231a6fcb638b7929e9fc88c4fa73a04e6d4e0c Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Fri, 3 May 2024 14:01:26 +0100 Subject: [PATCH 01/18] KVM: arm64: Make kvm_at() take an OP_AT_* To allow using newer instructions that current assemblers don't know about, replace the `at` instruction with the underlying SYS instruction. Signed-off-by: Joey Gouly Cc: Oliver Upton Cc: Catalin Marinas Cc: Will Deacon Reviewed-by: Marc Zyngier Reviewed-by: Anshuman Khandual Acked-by: Will Deacon Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 3 ++- arch/arm64/kvm/hyp/include/hyp/fault.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 2181a11b9d92..25f49f5fc4a6 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -10,6 +10,7 @@ #include #include #include +#include #define ARM_EXIT_WITH_SERROR_BIT 31 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) @@ -259,7 +260,7 @@ extern u64 __kvm_get_mdcr_el2(void); asm volatile( \ " mrs %1, spsr_el2\n" \ " mrs %2, elr_el2\n" \ - "1: at "at_op", %3\n" \ + "1: " __msr_s(at_op, "%3") "\n" \ " isb\n" \ " b 9f\n" \ "2: msr spsr_el2, %1\n" \ diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 9e13c1bc2ad5..487c06099d6f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -27,7 +27,7 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) * saved the guest context yet, and we may return early... */ par = read_sysreg_par(); - if (!__kvm_at("s1e1r", far)) + if (!__kvm_at(OP_AT_S1E1R, far)) tmp = read_sysreg_par(); else tmp = SYS_PAR_EL1_F; /* back to the guest */ From 4abc783e4741cd33216e7796e9b2f4973b4bca61 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Jun 2024 10:09:18 +0100 Subject: [PATCH 02/18] arm64: Add missing APTable and TCR_ELx.HPD masks Although Linux doesn't make use of hierarchical permissions (TFFT!), KVM needs to know where the various bits related to this feature live in the TCR_ELx registers as well as in the page tables. Add the missing bits. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_arm.h | 1 + arch/arm64/include/asm/pgtable-hwdef.h | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index d81cc746e0eb..109a85ee6910 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -107,6 +107,7 @@ /* TCR_EL2 Registers bits */ #define TCR_EL2_DS (1UL << 32) #define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) +#define TCR_EL2_HPD (1 << 24) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 1f60aa1bc750..07dfbdb14bab 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -204,6 +204,11 @@ */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) +/* + * Hierarchical permission for Stage-1 tables + */ +#define S1_TABLE_AP (_AT(pmdval_t, 3) << 61) + /* * Highest possible physical address supported. */ @@ -298,6 +303,10 @@ #define TCR_TBI1 (UL(1) << 38) #define TCR_HA (UL(1) << 39) #define TCR_HD (UL(1) << 40) +#define TCR_HPD0_SHIFT 41 +#define TCR_HPD0 (UL(1) << TCR_HPD0_SHIFT) +#define TCR_HPD1_SHIFT 42 +#define TCR_HPD1 (UL(1) << TCR_HPD1_SHIFT) #define TCR_TBID0 (UL(1) << 51) #define TCR_TBID1 (UL(1) << 52) #define TCR_NFD0 (UL(1) << 53) From 6dcd2ac7ea7c5b20b416ee09d8d5d2ec89866ef8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Jun 2024 10:10:19 +0100 Subject: [PATCH 03/18] arm64: Add PAR_EL1 field description As KVM is about to grow a full emulation for the AT instructions, add the layout of the PAR_EL1 register in its non-D128 configuration. Note that the constants are a bit ugly, as the register has two layouts, based on the state of the F bit. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 4a9ea103817e..d9d5e07f768d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -325,7 +325,25 @@ #define SYS_PAR_EL1 sys_reg(3, 0, 7, 4, 0) #define SYS_PAR_EL1_F BIT(0) +/* When PAR_EL1.F == 1 */ #define SYS_PAR_EL1_FST GENMASK(6, 1) +#define SYS_PAR_EL1_PTW BIT(8) +#define SYS_PAR_EL1_S BIT(9) +#define SYS_PAR_EL1_AssuredOnly BIT(12) +#define SYS_PAR_EL1_TopLevel BIT(13) +#define SYS_PAR_EL1_Overlay BIT(14) +#define SYS_PAR_EL1_DirtyBit BIT(15) +#define SYS_PAR_EL1_F1_IMPDEF GENMASK_ULL(63, 48) +#define SYS_PAR_EL1_F1_RES0 (BIT(7) | BIT(10) | GENMASK_ULL(47, 16)) +#define SYS_PAR_EL1_RES1 BIT(11) +/* When PAR_EL1.F == 0 */ +#define SYS_PAR_EL1_SH GENMASK_ULL(8, 7) +#define SYS_PAR_EL1_NS BIT(9) +#define SYS_PAR_EL1_F0_IMPDEF BIT(10) +#define SYS_PAR_EL1_NSE BIT(11) +#define SYS_PAR_EL1_PA GENMASK_ULL(51, 12) +#define SYS_PAR_EL1_ATTR GENMASK_ULL(63, 56) +#define SYS_PAR_EL1_F0_RES0 (GENMASK_ULL(6, 1) | GENMASK_ULL(55, 52)) /*** Statistical Profiling Extension ***/ #define PMSEVFR_EL1_RES0_IMP \ From b229b46b0bf7828bef5f88c91708776869b751ac Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Jul 2024 13:09:29 +0100 Subject: [PATCH 04/18] arm64: Add system register encoding for PSTATE.PAN Although we already have the primitives to set PSTATE.PAN with an immediate, we don't have a way to read the current state nor set it ot an arbitrary value (i.e. we can generally save/restore it). Thankfully, all that is missing for this is the definition for the PAN pseudo system register, here named SYS_PSTATE_PAN. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d9d5e07f768d..a2787091d5a0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -109,6 +109,9 @@ #define set_pstate_ssbs(x) asm volatile(SET_PSTATE_SSBS(x)) #define set_pstate_dit(x) asm volatile(SET_PSTATE_DIT(x)) +/* Register-based PAN access, for save/restore purposes */ +#define SYS_PSTATE_PAN sys_reg(3, 0, 4, 2, 3) + #define __SYS_BARRIER_INSN(CRm, op2, Rt) \ __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f)) From 5fddf9abc31a57e2cc35287998994cf4a684fada Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 31 Jul 2024 10:54:04 +0100 Subject: [PATCH 05/18] arm64: Add ESR_ELx_FSC_ADDRSZ_L() helper Although we have helpers that encode the level of a given fault type, the Address Size fault type is missing it. While we're at it, fix the bracketting for ESR_ELx_FSC_ACCESS_L() and ESR_ELx_FSC_PERM_L(). Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/esr.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 56c148890daf..d79308c23ddb 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -122,8 +122,8 @@ #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) /* Status codes for individual page table levels */ -#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + n) -#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + n) +#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) +#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n)) #define ESR_ELx_FSC_FAULT_nL (0x2C) #define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \ @@ -161,6 +161,7 @@ /* ISS field definitions for exceptions taken in to Hyp */ #define ESR_ELx_FSC_ADDRSZ (0x00) +#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n)) #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) From 4155539bc5baab514ac71285a1a13fcf148f9cf1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 10 Aug 2024 18:42:41 +0100 Subject: [PATCH 06/18] KVM: arm64: nv: Enforce S2 alignment when contiguous bit is set Despite KVM not using the contiguous bit for anything related to TLBs, the spec does require that the alignment defined by the contiguous bit for the page size and the level is enforced. Add the required checks to offset the point where PA and VA merge. Fixes: 61e30b9eef7f ("KVM: arm64: nv: Implement nested Stage-2 page table walk logic") Reported-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 22 ++++++++++++++++++++++ arch/arm64/kvm/nested.c | 7 ++----- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 5b06c31035a2..6e163501f13e 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -205,4 +205,26 @@ static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans) return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level); } +/* Adjust alignment for the contiguous bit as per StageOA() */ +#define contiguous_bit_shift(d, wi, l) \ + ({ \ + u8 shift = 0; \ + \ + if ((d) & PTE_CONT) { \ + switch (BIT((wi)->pgshift)) { \ + case SZ_4K: \ + shift = 4; \ + break; \ + case SZ_16K: \ + shift = (l) == 2 ? 5 : 7; \ + break; \ + case SZ_64K: \ + shift = 5; \ + break; \ + } \ + } \ + \ + shift; \ + }) + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index de789e0f1ae9..49a7832a3fb1 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -282,11 +282,6 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, return 1; } - /* - * We don't use the contiguous bit in the stage-2 ptes, so skip check - * for misprogramming of the contiguous bit. - */ - if (check_output_size(wi, desc)) { out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ); out->upper_attr = desc; @@ -299,6 +294,8 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, return 1; } + addr_bottom += contiguous_bit_shift(desc, wi, level); + /* Calculate and return the result */ paddr = (desc & GENMASK_ULL(47, addr_bottom)) | (ipa & GENMASK_ULL(addr_bottom - 1, 0)); From 0a0f25b71ca544388717f8bf4a54ba324e234e7a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 21 Jun 2024 14:59:36 +0100 Subject: [PATCH 07/18] KVM: arm64: nv: Turn upper_attr for S2 walk into the full descriptor The upper_attr attribute has been badly named, as it most of the time carries the full "last walked descriptor". Rename it to "desc" and make ti contain the full 64bit descriptor. This will be used by the S1 PTW. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 4 ++-- arch/arm64/kvm/nested.c | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 6e163501f13e..43e531c67311 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -85,7 +85,7 @@ struct kvm_s2_trans { bool readable; int level; u32 esr; - u64 upper_attr; + u64 desc; }; static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans) @@ -115,7 +115,7 @@ static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans) static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans) { - return !(trans->upper_attr & BIT(54)); + return !(trans->desc & BIT(54)); } extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa, diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 49a7832a3fb1..234d0f6006c6 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -256,7 +256,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, /* Check for valid descriptor at this point */ if (!(desc & 1) || ((desc & 3) == 1 && level == 3)) { out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); - out->upper_attr = desc; + out->desc = desc; return 1; } @@ -266,7 +266,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, if (check_output_size(wi, desc)) { out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ); - out->upper_attr = desc; + out->desc = desc; return 1; } @@ -278,19 +278,19 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, if (level < first_block_level) { out->esr = compute_fsc(level, ESR_ELx_FSC_FAULT); - out->upper_attr = desc; + out->desc = desc; return 1; } if (check_output_size(wi, desc)) { out->esr = compute_fsc(level, ESR_ELx_FSC_ADDRSZ); - out->upper_attr = desc; + out->desc = desc; return 1; } if (!(desc & BIT(10))) { out->esr = compute_fsc(level, ESR_ELx_FSC_ACCESS); - out->upper_attr = desc; + out->desc = desc; return 1; } @@ -304,7 +304,7 @@ static int walk_nested_s2_pgd(phys_addr_t ipa, out->readable = desc & (0b01 << 6); out->writable = desc & (0b10 << 6); out->level = level; - out->upper_attr = desc & GENMASK_ULL(63, 52); + out->desc = desc; return 0; } From 90659853febcf63ceb71529b247d518df3c2a76c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 19 Jun 2024 08:16:44 +0100 Subject: [PATCH 08/18] KVM: arm64: nv: Honor absence of FEAT_PAN2 If our guest has been configured without PAN2, make sure that AT S1E1{R,W}P will generate an UNDEF. Reviewed-by: Anshuman Khandual Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c90324060436..e7e5e0df119e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -4600,6 +4600,10 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu) HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS); + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2)) + kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP | + HFGITR_EL2_ATS1E1WP); + if (!kvm_has_feat(kvm, ID_AA64MMFR3_EL1, S1PIE, IMP)) kvm->arch.fgu[HFGxTR_GROUP] |= (HFGxTR_EL2_nPIRE0_EL1 | HFGxTR_EL2_nPIR_EL1); From 477e89cabb1428d5989430d57828347f5de2be9c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 19 Jun 2024 08:29:20 +0100 Subject: [PATCH 09/18] KVM: arm64: nv: Add basic emulation of AT S1E{0,1}{R,W} Emulating AT instructions is one the tasks devolved to the host hypervisor when NV is on. Here, we take the basic approach of emulating AT S1E{0,1}{R,W} using the AT instructions themselves. While this mostly work, it doesn't *always* work: - S1 page tables can be swapped out - shadow S2 can be incomplete and not contain mappings for the S1 page tables We are not trying to handle these case here, and defer it to a later patch. Suitable comments indicate where we are in dire need of better handling. Co-developed-by: Jintack Lim Signed-off-by: Jintack Lim Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/kvm/Makefile | 2 +- arch/arm64/kvm/at.c | 140 +++++++++++++++++++++++++++++++ 3 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/at.c diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 25f49f5fc4a6..9b6c9f4f4d88 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -236,6 +236,7 @@ extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); extern void __kvm_timer_set_cntvoff(u64 cntvoff); +extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a6497228c5a8..8a3ae76b4da2 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ vgic-sys-reg-v3.o fpsimd.o pkvm.o \ - arch_timer.o trng.o vmid.o emulate-nested.o nested.o \ + arch_timer.o trng.o vmid.o emulate-nested.o nested.o at.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c new file mode 100644 index 000000000000..da378ad834cd --- /dev/null +++ b/arch/arm64/kvm/at.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017 - Linaro Ltd + * Author: Jintack Lim + */ + +#include +#include + +struct mmu_config { + u64 ttbr0; + u64 ttbr1; + u64 tcr; + u64 mair; + u64 sctlr; + u64 vttbr; + u64 vtcr; + u64 hcr; +}; + +static void __mmu_config_save(struct mmu_config *config) +{ + config->ttbr0 = read_sysreg_el1(SYS_TTBR0); + config->ttbr1 = read_sysreg_el1(SYS_TTBR1); + config->tcr = read_sysreg_el1(SYS_TCR); + config->mair = read_sysreg_el1(SYS_MAIR); + config->sctlr = read_sysreg_el1(SYS_SCTLR); + config->vttbr = read_sysreg(vttbr_el2); + config->vtcr = read_sysreg(vtcr_el2); + config->hcr = read_sysreg(hcr_el2); +} + +static void __mmu_config_restore(struct mmu_config *config) +{ + write_sysreg(config->hcr, hcr_el2); + + /* + * ARM errata 1165522 and 1530923 require TGE to be 1 before + * we update the guest state. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); + + write_sysreg_el1(config->ttbr0, SYS_TTBR0); + write_sysreg_el1(config->ttbr1, SYS_TTBR1); + write_sysreg_el1(config->tcr, SYS_TCR); + write_sysreg_el1(config->mair, SYS_MAIR); + write_sysreg_el1(config->sctlr, SYS_SCTLR); + write_sysreg(config->vttbr, vttbr_el2); + write_sysreg(config->vtcr, vtcr_el2); +} + +/* + * Return the PAR_EL1 value as the result of a valid translation. + * + * If the translation is unsuccessful, the value may only contain + * PAR_EL1.F, and cannot be taken at face value. It isn't an + * indication of the translation having failed, only that the fast + * path did not succeed, *unless* it indicates a S1 permission fault. + */ +static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +{ + struct mmu_config config; + struct kvm_s2_mmu *mmu; + bool fail; + u64 par; + + par = SYS_PAR_EL1_F; + + /* + * We've trapped, so everything is live on the CPU. As we will + * be switching contexts behind everybody's back, disable + * interrupts while holding the mmu lock. + */ + guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock); + + /* + * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already + * the right one (as we trapped from vEL2). If not, save the + * full MMU context. + */ + if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) + goto skip_mmu_switch; + + /* + * Obtaining the S2 MMU for a L2 is horribly racy, and we may not + * find it (recycled by another vcpu, for example). When this + * happens, admit defeat immediately and use the SW (slow) path. + */ + mmu = lookup_s2_mmu(vcpu); + if (!mmu) + return par; + + __mmu_config_save(&config); + + write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0); + write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); + write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); + write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); + write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); + __load_stage2(mmu, mmu->arch); + +skip_mmu_switch: + /* Clear TGE, enable S2 translation, we're rolling */ + write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2); + isb(); + + switch (op) { + case OP_AT_S1E1R: + fail = __kvm_at(OP_AT_S1E1R, vaddr); + break; + case OP_AT_S1E1W: + fail = __kvm_at(OP_AT_S1E1W, vaddr); + break; + case OP_AT_S1E0R: + fail = __kvm_at(OP_AT_S1E0R, vaddr); + break; + case OP_AT_S1E0W: + fail = __kvm_at(OP_AT_S1E0W, vaddr); + break; + default: + WARN_ON_ONCE(1); + fail = true; + break; + } + + if (!fail) + par = read_sysreg_par(); + + if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) + __mmu_config_restore(&config); + + return par; +} + +void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +{ + u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); + + vcpu_write_sys_reg(vcpu, par, PAR_EL1); +} From be0135bde1df5e80cffacd2ed6f952e6d38d6f71 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 14 Jul 2024 10:40:43 +0100 Subject: [PATCH 10/18] KVM: arm64: nv: Add basic emulation of AT S1E1{R,W}P Building on top of our primitive AT S1E{0,1}{R,W} emulation, add minimal support for the FEAT_PAN2 instructions, momentary context-switching PSTATE.PAN so that it takes effect in the context of the guest. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index da378ad834cd..92df948350e1 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -49,6 +49,28 @@ static void __mmu_config_restore(struct mmu_config *config) write_sysreg(config->vtcr, vtcr_el2); } +static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +{ + u64 host_pan; + bool fail; + + host_pan = read_sysreg_s(SYS_PSTATE_PAN); + write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN); + + switch (op) { + case OP_AT_S1E1RP: + fail = __kvm_at(OP_AT_S1E1RP, vaddr); + break; + case OP_AT_S1E1WP: + fail = __kvm_at(OP_AT_S1E1WP, vaddr); + break; + } + + write_sysreg_s(host_pan, SYS_PSTATE_PAN); + + return fail; +} + /* * Return the PAR_EL1 value as the result of a valid translation. * @@ -105,6 +127,10 @@ skip_mmu_switch: isb(); switch (op) { + case OP_AT_S1E1RP: + case OP_AT_S1E1WP: + fail = at_s1e1p_fast(vcpu, op, vaddr); + break; case OP_AT_S1E1R: fail = __kvm_at(OP_AT_S1E1R, vaddr); break; From e794049b9acbd6500b77b9ce92a95101091b52d3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 19 Jun 2024 08:43:35 +0100 Subject: [PATCH 11/18] KVM: arm64: nv: Add basic emulation of AT S1E2{R,W} Similar to our AT S1E{0,1} emulation, we implement the AT S1E2 handling. This emulation of course suffers from the same problems, but is somehow simpler due to the lack of PAN2 and the fact that we are guaranteed to execute it from the correct context. Co-developed-by: Jintack Lim Signed-off-by: Jintack Lim Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/kvm/at.c | 51 ++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 9b6c9f4f4d88..6ec062296976 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -237,6 +237,7 @@ extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 92df948350e1..34736c1fe398 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -164,3 +164,54 @@ void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) vcpu_write_sys_reg(vcpu, par, PAR_EL1); } + +void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +{ + u64 par; + + /* + * We've trapped, so everything is live on the CPU. As we will be + * switching context behind everybody's back, disable interrupts... + */ + scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) { + struct kvm_s2_mmu *mmu; + u64 val, hcr; + bool fail; + + mmu = &vcpu->kvm->arch.mmu; + + val = hcr = read_sysreg(hcr_el2); + val &= ~HCR_TGE; + val |= HCR_VM; + + if (!vcpu_el2_e2h_is_set(vcpu)) + val |= HCR_NV | HCR_NV1; + + write_sysreg(val, hcr_el2); + isb(); + + par = SYS_PAR_EL1_F; + + switch (op) { + case OP_AT_S1E2R: + fail = __kvm_at(OP_AT_S1E1R, vaddr); + break; + case OP_AT_S1E2W: + fail = __kvm_at(OP_AT_S1E1W, vaddr); + break; + default: + WARN_ON_ONCE(1); + fail = true; + } + + isb(); + + if (!fail) + par = read_sysreg_par(); + + write_sysreg(hcr, hcr_el2); + isb(); + } + + vcpu_write_sys_reg(vcpu, par, PAR_EL1); +} From be04cebf3e78874627dc1042991d5d504464a5cc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 19 Jun 2024 08:44:52 +0100 Subject: [PATCH 12/18] KVM: arm64: nv: Add emulation of AT S12E{0,1}{R,W} On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk. However, there is a great deal of complexity coming from combining the S1 and S2 attributes to report something consistent in PAR_EL1. This is an absolute mine field, and I have a splitting headache. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/kvm/at.c | 253 +++++++++++++++++++++++++++++++ 2 files changed, 254 insertions(+) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6ec062296976..b36a3b6cc011 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -238,6 +238,7 @@ extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 34736c1fe398..9865d29b3149 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -71,6 +71,200 @@ static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) return fail; } +#define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic) +#define MEMATTR_NC 0b0100 +#define MEMATTR_Wt 0b1000 +#define MEMATTR_Wb 0b1100 +#define MEMATTR_WbRaWa 0b1111 + +#define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0) + +static u8 s2_memattr_to_attr(u8 memattr) +{ + memattr &= 0b1111; + + switch (memattr) { + case 0b0000: + case 0b0001: + case 0b0010: + case 0b0011: + return memattr << 2; + case 0b0100: + return MEMATTR(Wb, Wb); + case 0b0101: + return MEMATTR(NC, NC); + case 0b0110: + return MEMATTR(Wt, NC); + case 0b0111: + return MEMATTR(Wb, NC); + case 0b1000: + /* Reserved, assume NC */ + return MEMATTR(NC, NC); + case 0b1001: + return MEMATTR(NC, Wt); + case 0b1010: + return MEMATTR(Wt, Wt); + case 0b1011: + return MEMATTR(Wb, Wt); + case 0b1100: + /* Reserved, assume NC */ + return MEMATTR(NC, NC); + case 0b1101: + return MEMATTR(NC, Wb); + case 0b1110: + return MEMATTR(Wt, Wb); + case 0b1111: + return MEMATTR(Wb, Wb); + default: + unreachable(); + } +} + +static u8 combine_s1_s2_attr(u8 s1, u8 s2) +{ + bool transient; + u8 final = 0; + + /* Upgrade transient s1 to non-transient to simplify things */ + switch (s1) { + case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */ + transient = true; + s1 = MEMATTR_Wt | (s1 & GENMASK(1,0)); + break; + case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */ + transient = true; + s1 = MEMATTR_Wb | (s1 & GENMASK(1,0)); + break; + default: + transient = false; + } + + /* S2CombineS1AttrHints() */ + if ((s1 & GENMASK(3, 2)) == MEMATTR_NC || + (s2 & GENMASK(3, 2)) == MEMATTR_NC) + final = MEMATTR_NC; + else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt || + (s2 & GENMASK(3, 2)) == MEMATTR_Wt) + final = MEMATTR_Wt; + else + final = MEMATTR_Wb; + + if (final != MEMATTR_NC) { + /* Inherit RaWa hints form S1 */ + if (transient) { + switch (s1 & GENMASK(3, 2)) { + case MEMATTR_Wt: + final = 0; + break; + case MEMATTR_Wb: + final = MEMATTR_NC; + break; + } + } + + final |= s1 & GENMASK(1, 0); + } + + return final; +} + +#define ATTR_NSH 0b00 +#define ATTR_RSV 0b01 +#define ATTR_OSH 0b10 +#define ATTR_ISH 0b11 + +static u8 compute_sh(u8 attr, u64 desc) +{ + u8 sh; + + /* Any form of device, as well as NC has SH[1:0]=0b10 */ + if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) + return ATTR_OSH; + + sh = FIELD_GET(PTE_SHARED, desc); + if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ + sh = ATTR_NSH; + + return sh; +} + +static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, + struct kvm_s2_trans *tr) +{ + u8 s1_parattr, s2_memattr, final_attr; + u64 par; + + /* If S2 has failed to translate, report the damage */ + if (tr->esr) { + par = SYS_PAR_EL1_RES1; + par |= SYS_PAR_EL1_F; + par |= SYS_PAR_EL1_S; + par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); + return par; + } + + s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); + s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); + + if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { + if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) + s2_memattr &= ~BIT(3); + + /* Combination of R_VRJSW and R_RHWZM */ + switch (s2_memattr) { + case 0b0101: + if (MEMATTR_IS_DEVICE(s1_parattr)) + final_attr = s1_parattr; + else + final_attr = MEMATTR(NC, NC); + break; + case 0b0110: + case 0b1110: + final_attr = MEMATTR(WbRaWa, WbRaWa); + break; + case 0b0111: + case 0b1111: + /* Preserve S1 attribute */ + final_attr = s1_parattr; + break; + case 0b0100: + case 0b1100: + case 0b1101: + /* Reserved, do something non-silly */ + final_attr = s1_parattr; + break; + default: + /* MemAttr[2]=0, Device from S2 */ + final_attr = s2_memattr & GENMASK(1,0) << 2; + } + } else { + /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ + u8 s2_parattr = s2_memattr_to_attr(s2_memattr); + + if (MEMATTR_IS_DEVICE(s1_parattr) || + MEMATTR_IS_DEVICE(s2_parattr)) { + final_attr = min(s1_parattr, s2_parattr); + } else { + /* At this stage, this is memory vs memory */ + final_attr = combine_s1_s2_attr(s1_parattr & 0xf, + s2_parattr & 0xf); + final_attr |= combine_s1_s2_attr(s1_parattr >> 4, + s2_parattr >> 4) << 4; + } + } + + if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && + !MEMATTR_IS_DEVICE(final_attr)) + final_attr = MEMATTR(NC, NC); + + par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); + par |= tr->output & GENMASK(47, 12); + par |= FIELD_PREP(SYS_PAR_EL1_SH, + compute_sh(final_attr, tr->desc)); + + return par; +} + /* * Return the PAR_EL1 value as the result of a valid translation. * @@ -215,3 +409,62 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) vcpu_write_sys_reg(vcpu, par, PAR_EL1); } + +void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +{ + struct kvm_s2_trans out = {}; + u64 ipa, par; + bool write; + int ret; + + /* Do the stage-1 translation */ + switch (op) { + case OP_AT_S12E1R: + op = OP_AT_S1E1R; + write = false; + break; + case OP_AT_S12E1W: + op = OP_AT_S1E1W; + write = true; + break; + case OP_AT_S12E0R: + op = OP_AT_S1E0R; + write = false; + break; + case OP_AT_S12E0W: + op = OP_AT_S1E0W; + write = true; + break; + default: + WARN_ON_ONCE(1); + return; + } + + __kvm_at_s1e01(vcpu, op, vaddr); + par = vcpu_read_sys_reg(vcpu, PAR_EL1); + if (par & SYS_PAR_EL1_F) + return; + + /* + * If we only have a single stage of translation (E2H=0 or + * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. + */ + if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || + !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) + return; + + /* Do the stage-2 translation */ + ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); + out.esr = 0; + ret = kvm_walk_nested_s2(vcpu, ipa, &out); + if (ret < 0) + return; + + /* Check the access permission */ + if (!out.esr && + ((!write && !out.readable) || (write && !out.writable))) + out.esr = ESR_ELx_FSC_PERM | (out.level & 0x3); + + par = compute_par_s12(vcpu, par, &out); + vcpu_write_sys_reg(vcpu, par, PAR_EL1); +} From 97634dac1974d28e5ffc067d257f0b0f79b5ed2e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Jun 2024 10:12:15 +0100 Subject: [PATCH 13/18] KVM: arm64: nv: Make ps_to_output_size() generally available Make this helper visible to at.c, we are going to need it. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_nested.h | 14 ++++++++++++++ arch/arm64/kvm/nested.c | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h index 43e531c67311..e8bc6d67aba2 100644 --- a/arch/arm64/include/asm/kvm_nested.h +++ b/arch/arm64/include/asm/kvm_nested.h @@ -227,4 +227,18 @@ static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans) shift; \ }) +static inline unsigned int ps_to_output_size(unsigned int ps) +{ + switch (ps) { + case 0: return 32; + case 1: return 36; + case 2: return 40; + case 3: return 42; + case 4: return 44; + case 5: + default: + return 48; + } +} + #endif /* __ARM64_KVM_NESTED_H */ diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 234d0f6006c6..9c8573493d80 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -103,20 +103,6 @@ struct s2_walk_info { bool be; }; -static unsigned int ps_to_output_size(unsigned int ps) -{ - switch (ps) { - case 0: return 32; - case 1: return 36; - case 2: return 40; - case 3: return 42; - case 4: return 44; - case 5: - default: - return 48; - } -} - static u32 compute_fsc(int level, u32 fsc) { return fsc | (level & 0x3); From d6a01a2dc760c8350fa182a6afd69fabab131f73 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 18 Jun 2024 10:40:02 +0100 Subject: [PATCH 14/18] KVM: arm64: nv: Add SW walker for AT S1 emulation In order to plug the brokenness of our current AT implementation, we need a SW walker that is going to... err.. walk the S1 tables and tell us what it finds. Of course, it builds on top of our S2 walker, and share similar concepts. The beauty of it is that since it uses kvm_read_guest(), it is able to bring back pages that have been otherwise evicted. This is then plugged in the two AT S1 emulation functions as a "slow path" fallback. I'm not sure it is that slow, but hey. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 610 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 608 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 9865d29b3149..e037eb73738a 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -4,9 +4,408 @@ * Author: Jintack Lim */ +#include + +#include #include #include +enum trans_regime { + TR_EL10, + TR_EL20, + TR_EL2, +}; + +struct s1_walk_info { + u64 baddr; + enum trans_regime regime; + unsigned int max_oa_bits; + unsigned int pgshift; + unsigned int txsz; + int sl; + bool hpd; + bool be; + bool s2; +}; + +struct s1_walk_result { + union { + struct { + u64 desc; + u64 pa; + s8 level; + u8 APTable; + bool UXNTable; + bool PXNTable; + }; + struct { + u8 fst; + bool ptw; + bool s2; + }; + }; + bool failed; +}; + +static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2) +{ + wr->fst = fst; + wr->ptw = ptw; + wr->s2 = s2; + wr->failed = true; +} + +#define S1_MMU_DISABLED (-127) + +static int get_ia_size(struct s1_walk_info *wi) +{ + return 64 - wi->txsz; +} + +/* Return true if the IPA is out of the OA range */ +static bool check_output_size(u64 ipa, struct s1_walk_info *wi) +{ + return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); +} + +/* Return the translation regime that applies to an AT instruction */ +static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) +{ + /* + * We only get here from guest EL2, so the translation + * regime AT applies to is solely defined by {E2H,TGE}. + */ + switch (op) { + case OP_AT_S1E2R: + case OP_AT_S1E2W: + return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; + break; + default: + return (vcpu_el2_e2h_is_set(vcpu) && + vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; + } +} + +static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, + struct s1_walk_result *wr, u64 va) +{ + u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr; + unsigned int stride, x; + bool va55, tbi, lva, as_el0; + + hcr = __vcpu_sys_reg(vcpu, HCR_EL2); + + wi->regime = compute_translation_regime(vcpu, op); + as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W); + + va55 = va & BIT(55); + + if (wi->regime == TR_EL2 && va55) + goto addrsz; + + wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); + + switch (wi->regime) { + case TR_EL10: + sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); + ttbr = (va55 ? + vcpu_read_sys_reg(vcpu, TTBR1_EL1) : + vcpu_read_sys_reg(vcpu, TTBR0_EL1)); + break; + case TR_EL2: + case TR_EL20: + sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); + tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); + ttbr = (va55 ? + vcpu_read_sys_reg(vcpu, TTBR1_EL2) : + vcpu_read_sys_reg(vcpu, TTBR0_EL2)); + break; + default: + BUG(); + } + + tbi = (wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_TBI, tcr) : + (va55 ? + FIELD_GET(TCR_TBI1, tcr) : + FIELD_GET(TCR_TBI0, tcr))); + + if (!tbi && (u64)sign_extend64(va, 55) != va) + goto addrsz; + + va = (u64)sign_extend64(va, 55); + + /* Let's put the MMU disabled case aside immediately */ + switch (wi->regime) { + case TR_EL10: + /* + * If dealing with the EL1&0 translation regime, 3 things + * can disable the S1 translation: + * + * - HCR_EL2.DC = 1 + * - HCR_EL2.{E2H,TGE} = {0,1} + * - SCTLR_EL1.M = 0 + * + * The TGE part is interesting. If we have decided that this + * is EL1&0, then it means that either {E2H,TGE} == {1,0} or + * {0,x}, and we only need to test for TGE == 1. + */ + if (hcr & (HCR_DC | HCR_TGE)) { + wr->level = S1_MMU_DISABLED; + break; + } + fallthrough; + case TR_EL2: + case TR_EL20: + if (!(sctlr & SCTLR_ELx_M)) + wr->level = S1_MMU_DISABLED; + break; + } + + if (wr->level == S1_MMU_DISABLED) { + if (va >= BIT(kvm_get_pa_bits(vcpu->kvm))) + goto addrsz; + + wr->pa = va; + return 0; + } + + wi->be = sctlr & SCTLR_ELx_EE; + + wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP); + wi->hpd &= (wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_HPD, tcr) : + (va55 ? + FIELD_GET(TCR_HPD1, tcr) : + FIELD_GET(TCR_HPD0, tcr))); + + /* Someone was silly enough to encode TG0/TG1 differently */ + if (va55) { + wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); + tg = FIELD_GET(TCR_TG1_MASK, tcr); + + switch (tg << TCR_TG1_SHIFT) { + case TCR_TG1_4K: + wi->pgshift = 12; break; + case TCR_TG1_16K: + wi->pgshift = 14; break; + case TCR_TG1_64K: + default: /* IMPDEF: treat any other value as 64k */ + wi->pgshift = 16; break; + } + } else { + wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); + tg = FIELD_GET(TCR_TG0_MASK, tcr); + + switch (tg << TCR_TG0_SHIFT) { + case TCR_TG0_4K: + wi->pgshift = 12; break; + case TCR_TG0_16K: + wi->pgshift = 14; break; + case TCR_TG0_64K: + default: /* IMPDEF: treat any other value as 64k */ + wi->pgshift = 16; break; + } + } + + /* R_PLCGL, R_YXNYW */ + if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { + if (wi->txsz > 39) + goto transfault_l0; + } else { + if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) + goto transfault_l0; + } + + /* R_GTJBY, R_SXWGM */ + switch (BIT(wi->pgshift)) { + case SZ_4K: + lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); + lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); + break; + case SZ_16K: + lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); + lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); + break; + case SZ_64K: + lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); + break; + } + + if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) + goto transfault_l0; + + ia_bits = get_ia_size(wi); + + /* R_YYVYV, I_THCZK */ + if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || + (va55 && va < GENMASK(63, ia_bits))) + goto transfault_l0; + + /* I_ZFSYQ */ + if (wi->regime != TR_EL2 && + (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) + goto transfault_l0; + + /* R_BNDVG and following statements */ + if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && + as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) + goto transfault_l0; + + /* AArch64.S1StartLevel() */ + stride = wi->pgshift - 3; + wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); + + ps = (wi->regime == TR_EL2 ? + FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); + + wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); + + /* Compute minimal alignment */ + x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); + + wi->baddr = ttbr & TTBRx_EL1_BADDR; + + /* R_VPBBF */ + if (check_output_size(wi->baddr, wi)) + goto addrsz; + + wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); + + return 0; + +addrsz: /* Address Size Fault level 0 */ + fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false); + return -EFAULT; + +transfault_l0: /* Translation Fault level 0 */ + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false); + return -EFAULT; +} + +static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, + struct s1_walk_result *wr, u64 va) +{ + u64 va_top, va_bottom, baddr, desc; + int level, stride, ret; + + level = wi->sl; + stride = wi->pgshift - 3; + baddr = wi->baddr; + + va_top = get_ia_size(wi) - 1; + + while (1) { + u64 index, ipa; + + va_bottom = (3 - level) * stride + wi->pgshift; + index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); + + ipa = baddr | index; + + if (wi->s2) { + struct kvm_s2_trans s2_trans = {}; + + ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); + if (ret) { + fail_s1_walk(wr, + (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, + true, true); + return ret; + } + + if (!kvm_s2_trans_readable(&s2_trans)) { + fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), + true, true); + + return -EPERM; + } + + ipa = kvm_s2_trans_output(&s2_trans); + } + + ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); + if (ret) { + fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), + true, false); + return ret; + } + + if (wi->be) + desc = be64_to_cpu((__force __be64)desc); + else + desc = le64_to_cpu((__force __le64)desc); + + /* Invalid descriptor */ + if (!(desc & BIT(0))) + goto transfault; + + /* Block mapping, check validity down the line */ + if (!(desc & BIT(1))) + break; + + /* Page mapping */ + if (level == 3) + break; + + /* Table handling */ + if (!wi->hpd) { + wr->APTable |= FIELD_GET(S1_TABLE_AP, desc); + wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc); + wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); + } + + baddr = desc & GENMASK_ULL(47, wi->pgshift); + + /* Check for out-of-range OA */ + if (check_output_size(baddr, wi)) + goto addrsz; + + /* Prepare for next round */ + va_top = va_bottom - 1; + level++; + } + + /* Block mapping, check the validity of the level */ + if (!(desc & BIT(1))) { + bool valid_block = false; + + switch (BIT(wi->pgshift)) { + case SZ_4K: + valid_block = level == 1 || level == 2; + break; + case SZ_16K: + case SZ_64K: + valid_block = level == 2; + break; + } + + if (!valid_block) + goto transfault; + } + + if (check_output_size(desc & GENMASK(47, va_bottom), wi)) + goto addrsz; + + va_bottom += contiguous_bit_shift(desc, wi, level); + + wr->failed = false; + wr->level = level; + wr->desc = desc; + wr->pa = desc & GENMASK(47, va_bottom); + wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); + + return 0; + +addrsz: + fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false); + return -EINVAL; +transfault: + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false); + return -ENOENT; +} + struct mmu_config { u64 ttbr0; u64 ttbr1; @@ -188,6 +587,16 @@ static u8 compute_sh(u8 attr, u64 desc) return sh; } +static u8 combine_sh(u8 s1_sh, u8 s2_sh) +{ + if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) + return ATTR_OSH; + if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) + return ATTR_ISH; + + return ATTR_NSH; +} + static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, struct kvm_s2_trans *tr) { @@ -260,11 +669,185 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); par |= tr->output & GENMASK(47, 12); par |= FIELD_PREP(SYS_PAR_EL1_SH, - compute_sh(final_attr, tr->desc)); + combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), + compute_sh(final_attr, tr->desc))); return par; } +static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, + enum trans_regime regime) +{ + u64 par; + + if (wr->failed) { + par = SYS_PAR_EL1_RES1; + par |= SYS_PAR_EL1_F; + par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst); + par |= wr->ptw ? SYS_PAR_EL1_PTW : 0; + par |= wr->s2 ? SYS_PAR_EL1_S : 0; + } else if (wr->level == S1_MMU_DISABLED) { + /* MMU off or HCR_EL2.DC == 1 */ + par = SYS_PAR_EL1_NSE; + par |= wr->pa & GENMASK_ULL(47, 12); + + if (regime == TR_EL10 && + (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { + par |= FIELD_PREP(SYS_PAR_EL1_ATTR, + MEMATTR(WbRaWa, WbRaWa)); + par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH); + } else { + par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */ + par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH); + } + } else { + u64 mair, sctlr; + u8 sh; + + par = SYS_PAR_EL1_NSE; + + mair = (regime == TR_EL10 ? + vcpu_read_sys_reg(vcpu, MAIR_EL1) : + vcpu_read_sys_reg(vcpu, MAIR_EL2)); + + mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; + mair &= 0xff; + + sctlr = (regime == TR_EL10 ? + vcpu_read_sys_reg(vcpu, SCTLR_EL1) : + vcpu_read_sys_reg(vcpu, SCTLR_EL2)); + + /* Force NC for memory if SCTLR_ELx.C is clear */ + if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair)) + mair = MEMATTR(NC, NC); + + par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); + par |= wr->pa & GENMASK_ULL(47, 12); + + sh = compute_sh(mair, wr->desc); + par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); + } + + return par; +} + +static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +{ + bool perm_fail, ur, uw, ux, pr, pw, px; + struct s1_walk_result wr = {}; + struct s1_walk_info wi = {}; + int ret, idx; + + ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr); + if (ret) + goto compute_par; + + if (wr.level == S1_MMU_DISABLED) + goto compute_par; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + + ret = walk_s1(vcpu, &wi, &wr, vaddr); + + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + if (ret) + goto compute_par; + + /* FIXME: revisit when adding indirect permission support */ + /* AArch64.S1DirectBasePermissions() */ + if (wi.regime != TR_EL2) { + switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) { + case 0b00: + pr = pw = true; + ur = uw = false; + break; + case 0b01: + pr = pw = ur = uw = true; + break; + case 0b10: + pr = true; + pw = ur = uw = false; + break; + case 0b11: + pr = ur = true; + pw = uw = false; + break; + } + + switch (wr.APTable) { + case 0b00: + break; + case 0b01: + ur = uw = false; + break; + case 0b10: + pw = uw = false; + break; + case 0b11: + pw = ur = uw = false; + break; + } + + /* We don't use px for anything yet, but hey... */ + px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw); + ux = !((wr.desc & PTE_UXN) || wr.UXNTable); + + if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) { + bool pan; + + pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT; + pan &= ur || uw; + pw &= !pan; + pr &= !pan; + } + } else { + ur = uw = ux = false; + + if (!(wr.desc & PTE_RDONLY)) { + pr = pw = true; + } else { + pr = true; + pw = false; + } + + if (wr.APTable & BIT(1)) + pw = false; + + /* XN maps to UXN */ + px = !((wr.desc & PTE_UXN) || wr.UXNTable); + } + + perm_fail = false; + + switch (op) { + case OP_AT_S1E1RP: + case OP_AT_S1E1R: + case OP_AT_S1E2R: + perm_fail = !pr; + break; + case OP_AT_S1E1WP: + case OP_AT_S1E1W: + case OP_AT_S1E2W: + perm_fail = !pw; + break; + case OP_AT_S1E0R: + perm_fail = !ur; + break; + case OP_AT_S1E0W: + perm_fail = !uw; + break; + default: + BUG(); + } + + if (perm_fail) + fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false); + +compute_par: + return compute_par_s1(vcpu, &wr, wi.regime); +} + /* * Return the PAR_EL1 value as the result of a valid translation. * @@ -352,10 +935,29 @@ skip_mmu_switch: return par; } +static bool par_check_s1_perm_fault(u64 par) +{ + u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); + + return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM && + !(par & SYS_PAR_EL1_S)); +} + void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); + /* + * If PAR_EL1 reports that AT failed on a S1 permission fault, we + * know for sure that the PTW was able to walk the S1 tables and + * there's nothing else to do. + * + * If AT failed for any other reason, then we must walk the guest S1 + * to emulate the instruction. + */ + if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) + par = handle_at_slow(vcpu, op, vaddr); + vcpu_write_sys_reg(vcpu, par, PAR_EL1); } @@ -407,6 +1009,10 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) isb(); } + /* We failed the translation, let's replay it in slow motion */ + if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) + par = handle_at_slow(vcpu, op, vaddr); + vcpu_write_sys_reg(vcpu, par, PAR_EL1); } @@ -463,7 +1069,7 @@ void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) /* Check the access permission */ if (!out.esr && ((!write && !out.readable) || (write && !out.writable))) - out.esr = ESR_ELx_FSC_PERM | (out.level & 0x3); + out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3); par = compute_par_s12(vcpu, par, &out); vcpu_write_sys_reg(vcpu, par, PAR_EL1); From 2441418f3aadb3f9232431aeb10d89e48a934d94 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 15 Jul 2024 17:22:19 +0100 Subject: [PATCH 15/18] KVM: arm64: nv: Sanitise SCTLR_EL1.EPAN according to VM configuration Ensure that SCTLR_EL1.EPAN is RES0 when FEAT_PAN3 isn't supported. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/nested.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 9c8573493d80..133cc2f9530d 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1178,6 +1178,14 @@ int kvm_init_nv_sysregs(struct kvm *kvm) if (!kvm_has_feat(kvm, ID_AA64PFR0_EL1, AMU, V1P1)) res0 |= ~(res0 | res1); set_sysreg_masks(kvm, HAFGRTR_EL2, res0, res1); + + /* SCTLR_EL1 */ + res0 = SCTLR_EL1_RES0; + res1 = SCTLR_EL1_RES1; + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) + res0 |= SCTLR_EL1_EPAN; + set_sysreg_masks(kvm, SCTLR_EL1, res0, res1); + out: mutex_unlock(&kvm->arch.config_lock); From d95bb9ef164edb33565cb73e3f0b0a581b3e4fbb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sat, 20 Jul 2024 22:06:00 +0100 Subject: [PATCH 16/18] KVM: arm64: nv: Make AT+PAN instructions aware of FEAT_PAN3 FEAT_PAN3 added a check for executable permissions to FEAT_PAN2. Add the required SCTLR_ELx.EPAN and descriptor checks to handle this correctly. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index e037eb73738a..60f1ca3a897d 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -731,6 +731,21 @@ static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, return par; } +static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) +{ + u64 sctlr; + + if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) + return false; + + if (regime == TR_EL10) + sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + else + sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); + + return sctlr & SCTLR_EL1_EPAN; +} + static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { bool perm_fail, ur, uw, ux, pr, pw, px; @@ -797,7 +812,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) bool pan; pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT; - pan &= ur || uw; + pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux); pw &= !pan; pr &= !pan; } From 8df747f4f3a5c680e3c0e68af3487b97343ca80a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 19 Jun 2024 08:14:45 +0100 Subject: [PATCH 17/18] KVM: arm64: nv: Plumb handling of AT S1* traps from EL2 Hooray, we're done. Plug the AT traps into the system instruction table, and let it rip. Signed-off-by: Marc Zyngier --- arch/arm64/kvm/sys_regs.c | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e7e5e0df119e..9f3cf82e5231 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2803,6 +2803,36 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(SP_EL2, NULL, reset_unknown, 0), }; +static bool handle_at_s1e01(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + + __kvm_at_s1e01(vcpu, op, p->regval); + + return true; +} + +static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + + __kvm_at_s1e2(vcpu, op, p->regval); + + return true; +} + +static bool handle_at_s12(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + + __kvm_at_s12(vcpu, op, p->regval); + + return true; +} + static bool kvm_supported_tlbi_s12_op(struct kvm_vcpu *vpcu, u32 instr) { struct kvm *kvm = vpcu->kvm; @@ -3065,6 +3095,14 @@ static struct sys_reg_desc sys_insn_descs[] = { { SYS_DESC(SYS_DC_ISW), access_dcsw }, { SYS_DESC(SYS_DC_IGSW), access_dcgsw }, { SYS_DESC(SYS_DC_IGDSW), access_dcgsw }, + + SYS_INSN(AT_S1E1R, handle_at_s1e01), + SYS_INSN(AT_S1E1W, handle_at_s1e01), + SYS_INSN(AT_S1E0R, handle_at_s1e01), + SYS_INSN(AT_S1E0W, handle_at_s1e01), + SYS_INSN(AT_S1E1RP, handle_at_s1e01), + SYS_INSN(AT_S1E1WP, handle_at_s1e01), + { SYS_DESC(SYS_DC_CSW), access_dcsw }, { SYS_DESC(SYS_DC_CGSW), access_dcgsw }, { SYS_DESC(SYS_DC_CGDSW), access_dcgsw }, @@ -3144,6 +3182,13 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(TLBI_VALE1NXS, handle_tlbi_el1), SYS_INSN(TLBI_VAALE1NXS, handle_tlbi_el1), + SYS_INSN(AT_S1E2R, handle_at_s1e2), + SYS_INSN(AT_S1E2W, handle_at_s1e2), + SYS_INSN(AT_S12E1R, handle_at_s12), + SYS_INSN(AT_S12E1W, handle_at_s12), + SYS_INSN(AT_S12E0R, handle_at_s12), + SYS_INSN(AT_S12E0W, handle_at_s12), + SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is), SYS_INSN(TLBI_RIPAS2E1IS, handle_ripas2e1is), SYS_INSN(TLBI_IPAS2LE1IS, handle_ipas2e1is), From ff987ffc0c18c98f05ddc7696d56bb493b994450 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 23 Jun 2024 10:46:45 +0100 Subject: [PATCH 18/18] KVM: arm64: nv: Add support for FEAT_ATS1A Handling FEAT_ATS1A (which provides the AT S1E{1,2}A instructions) is pretty easy, as it is just the usual AT without the permission check. This basically amounts to plumbing the instructions in the various dispatch tables, and handling FEAT_ATS1A being disabled in the ID registers. Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 1 + arch/arm64/kvm/at.c | 10 ++++++++++ arch/arm64/kvm/emulate-nested.c | 2 ++ arch/arm64/kvm/sys_regs.c | 11 +++++++++++ 4 files changed, 24 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index a2787091d5a0..bc161f160854 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -673,6 +673,7 @@ #define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5) #define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6) #define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7) +#define OP_AT_S1E2A sys_insn(AT_Op0, 4, AT_CRn, 9, 2) /* TLBI instructions */ #define TLBI_Op0 1 diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 60f1ca3a897d..39f0e87a340e 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -78,6 +78,7 @@ static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 o switch (op) { case OP_AT_S1E2R: case OP_AT_S1E2W: + case OP_AT_S1E2A: return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; break; default: @@ -852,6 +853,9 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) case OP_AT_S1E0W: perm_fail = !uw; break; + case OP_AT_S1E1A: + case OP_AT_S1E2A: + break; default: BUG(); } @@ -935,6 +939,9 @@ skip_mmu_switch: case OP_AT_S1E0W: fail = __kvm_at(OP_AT_S1E0W, vaddr); break; + case OP_AT_S1E1A: + fail = __kvm_at(OP_AT_S1E1A, vaddr); + break; default: WARN_ON_ONCE(1); fail = true; @@ -1010,6 +1017,9 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) case OP_AT_S1E2W: fail = __kvm_at(OP_AT_S1E1W, vaddr); break; + case OP_AT_S1E2A: + fail = __kvm_at(OP_AT_S1E1A, vaddr); + break; default: WARN_ON_ONCE(1); fail = true; diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 05166eccea0a..dbbae64c642c 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -786,6 +786,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(OP_AT_S12E1W, CGT_HCR_NV), SR_TRAP(OP_AT_S12E0R, CGT_HCR_NV), SR_TRAP(OP_AT_S12E0W, CGT_HCR_NV), + SR_TRAP(OP_AT_S1E2A, CGT_HCR_NV), SR_TRAP(OP_TLBI_IPAS2E1, CGT_HCR_NV), SR_TRAP(OP_TLBI_RIPAS2E1, CGT_HCR_NV), SR_TRAP(OP_TLBI_IPAS2LE1, CGT_HCR_NV), @@ -867,6 +868,7 @@ static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = { SR_TRAP(OP_AT_S1E0W, CGT_HCR_AT), SR_TRAP(OP_AT_S1E1RP, CGT_HCR_AT), SR_TRAP(OP_AT_S1E1WP, CGT_HCR_AT), + SR_TRAP(OP_AT_S1E1A, CGT_HCR_AT), SR_TRAP(SYS_ERXPFGF_EL1, CGT_HCR_nFIEN), SR_TRAP(SYS_ERXPFGCTL_EL1, CGT_HCR_nFIEN), SR_TRAP(SYS_ERXPFGCDN_EL1, CGT_HCR_nFIEN), diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 9f3cf82e5231..5ab0b2799393 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2818,6 +2818,13 @@ static bool handle_at_s1e2(struct kvm_vcpu *vcpu, struct sys_reg_params *p, { u32 op = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2); + /* There is no FGT associated with AT S1E2A :-( */ + if (op == OP_AT_S1E2A && + !kvm_has_feat(vcpu->kvm, ID_AA64ISAR2_EL1, ATS1A, IMP)) { + kvm_inject_undefined(vcpu); + return false; + } + __kvm_at_s1e2(vcpu, op, p->regval); return true; @@ -3188,6 +3195,7 @@ static struct sys_reg_desc sys_insn_descs[] = { SYS_INSN(AT_S12E1W, handle_at_s12), SYS_INSN(AT_S12E0R, handle_at_s12), SYS_INSN(AT_S12E0W, handle_at_s12), + SYS_INSN(AT_S1E2A, handle_at_s1e2), SYS_INSN(TLBI_IPAS2E1IS, handle_ipas2e1is), SYS_INSN(TLBI_RIPAS2E1IS, handle_ripas2e1is), @@ -4645,6 +4653,9 @@ void kvm_calculate_traps(struct kvm_vcpu *vcpu) HFGITR_EL2_TLBIRVAAE1OS | HFGITR_EL2_TLBIRVAE1OS); + if (!kvm_has_feat(kvm, ID_AA64ISAR2_EL1, ATS1A, IMP)) + kvm->arch.fgu[HFGITR_GROUP] |= HFGITR_EL2_ATS1E1A; + if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, PAN, PAN2)) kvm->arch.fgu[HFGITR_GROUP] |= (HFGITR_EL2_ATS1E1RP | HFGITR_EL2_ATS1E1WP);