Merge branch 'for-next/mm' into for-next/core

* for-next/mm:
  arm64: map [_text, _stext) virtual address range non-executable+read-only
  arm64: Enable vmalloc-huge with ptdump
  arm64: mm: split linear mapping if BBML2 unsupported on secondary CPUs
  arm64: mm: support large block mapping when rodata=full
  arm64: Enable permission change on arm64 kernel block mappings
  arm64/Kconfig: Remove CONFIG_RODATA_FULL_DEFAULT_ENABLED
  arm64: mm: Rework the 'rodata=' options
  arm64: mm: Represent physical memory with phys_addr_t and resource_size_t
  arm64: mm: Make map_fdt() return mapped pointer
  arm64: mm: Cast start/end markers to char *, not u64
This commit is contained in:
Will Deacon 2025-09-24 16:34:34 +01:00
commit 77dfca70ba
20 changed files with 694 additions and 148 deletions

@ -6405,8 +6405,9 @@
rodata= [KNL,EARLY]
on Mark read-only kernel memory as read-only (default).
off Leave read-only kernel memory writable for debugging.
full Mark read-only kernel memory and aliases as read-only
[arm64]
noalias Mark read-only kernel memory as read-only but retain
writable aliases in the direct map for regions outside
of the kernel image. [arm64]
rockchip.usb_uart
[EARLY]

@ -1700,20 +1700,6 @@ config MITIGATE_SPECTRE_BRANCH_HISTORY
When taking an exception from user-space, a sequence of branches
or a firmware call overwrites the branch history.
config RODATA_FULL_DEFAULT_ENABLED
bool "Apply r/o permissions of VM areas also to their linear aliases"
default y
help
Apply read-only attributes of VM areas to the linear alias of
the backing pages as well. This prevents code or read-only data
from being modified (inadvertently or intentionally) via another
mapping of the same memory page. This additional enhancement can
be turned off at runtime by passing rodata=[off|on] (and turned on
with rodata=full if this option is set to 'n')
This requires the linear region to be mapped down to pages,
which may adversely affect performance in some cases.
config ARM64_SW_TTBR0_PAN
bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
depends on !KCSAN

@ -871,6 +871,8 @@ static inline bool system_supports_pmuv3(void)
return cpus_have_final_cap(ARM64_HAS_PMUV3);
}
bool cpu_supports_bbml2_noabort(void);
static inline bool system_supports_bbml2_noabort(void)
{
return alternative_has_cap_unlikely(ARM64_HAS_BBML2_NOABORT);

@ -78,6 +78,9 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
pgprot_t prot, bool page_mappings_only);
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
extern void mark_linear_text_alias_ro(void);
extern int split_kernel_leaf_mapping(unsigned long start, unsigned long end);
extern void init_idmap_kpti_bbml2_flag(void);
extern void linear_map_maybe_split_to_ptes(void);
/*
* This check is triggered during the early boot before the cpufeature

@ -371,6 +371,11 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
}
static inline pmd_t pmd_mknoncont(pmd_t pmd)
{
return __pmd(pmd_val(pmd) & ~PMD_SECT_CONT);
}
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{

@ -7,6 +7,8 @@
#include <linux/ptdump.h>
DECLARE_STATIC_KEY_FALSE(arm64_ptdump_lock_key);
#ifdef CONFIG_PTDUMP
#include <linux/mm_types.h>

@ -21,7 +21,7 @@ static inline bool arch_parse_debug_rodata(char *arg)
if (!arg)
return false;
if (!strcmp(arg, "full")) {
if (!strcmp(arg, "on")) {
rodata_enabled = rodata_full = true;
return true;
}
@ -31,7 +31,7 @@ static inline bool arch_parse_debug_rodata(char *arg)
return true;
}
if (!strcmp(arg, "on")) {
if (!strcmp(arg, "noalias")) {
rodata_enabled = true;
rodata_full = false;
return true;

@ -9,18 +9,13 @@
#define arch_vmap_pud_supported arch_vmap_pud_supported
static inline bool arch_vmap_pud_supported(pgprot_t prot)
{
/*
* SW table walks can't handle removal of intermediate entries.
*/
return pud_sect_supported() &&
!IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
return pud_sect_supported();
}
#define arch_vmap_pmd_supported arch_vmap_pmd_supported
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
{
/* See arch_vmap_pud_supported() */
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
return true;
}
#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size

@ -86,6 +86,7 @@
#include <asm/kvm_host.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/mmu.h>
#include <asm/mte.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@ -2029,6 +2030,7 @@ static void __init kpti_install_ng_mappings(void)
if (arm64_use_ng_mappings)
return;
init_idmap_kpti_bbml2_flag();
stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask);
}
@ -2219,7 +2221,7 @@ static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
}
static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope)
bool cpu_supports_bbml2_noabort(void)
{
/*
* We want to allow usage of BBML2 in as wide a range of kernel contexts
@ -2255,6 +2257,11 @@ static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int sco
return true;
}
static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope)
{
return cpu_supports_bbml2_noabort();
}
#ifdef CONFIG_ARM64_PAN
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
{
@ -3930,6 +3937,7 @@ void __init setup_system_features(void)
{
setup_system_capabilities();
linear_map_maybe_split_to_ptes();
kpti_install_ng_mappings();
sve_setup();

@ -18,9 +18,9 @@
extern const u8 __eh_frame_start[], __eh_frame_end[];
extern void idmap_cpu_replace_ttbr1(void *pgdir);
extern void idmap_cpu_replace_ttbr1(phys_addr_t pgdir);
static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
static void __init map_segment(pgd_t *pg_dir, phys_addr_t *pgd, u64 va_offset,
void *start, void *end, pgprot_t prot,
bool may_use_cont, int root_level)
{
@ -40,7 +40,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
{
bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
u64 pgdp = (u64)init_pg_dir + PAGE_SIZE;
phys_addr_t pgdp = (phys_addr_t)init_pg_dir + PAGE_SIZE;
pgprot_t text_prot = PAGE_KERNEL_ROX;
pgprot_t data_prot = PAGE_KERNEL;
pgprot_t prot;
@ -78,6 +78,12 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
twopass |= enable_scs;
prot = twopass ? data_prot : text_prot;
/*
* [_stext, _text) isn't executed after boot and contains some
* non-executable, unpredictable data, so map it non-executable.
*/
map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot,
false, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
!twopass, root_level);
map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
@ -90,7 +96,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
true, root_level);
dsb(ishst);
idmap_cpu_replace_ttbr1(init_pg_dir);
idmap_cpu_replace_ttbr1((phys_addr_t)init_pg_dir);
if (twopass) {
if (IS_ENABLED(CONFIG_RELOCATABLE))
@ -129,10 +135,10 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
/* Copy the root page table to its final location */
memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
dsb(ishst);
idmap_cpu_replace_ttbr1(swapper_pg_dir);
idmap_cpu_replace_ttbr1((phys_addr_t)swapper_pg_dir);
}
static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr)
{
u64 sctlr = read_sysreg(sctlr_el1);
u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
@ -172,30 +178,30 @@ static void __init remap_idmap_for_lpa2(void)
*/
create_init_idmap(init_pg_dir, mask);
dsb(ishst);
set_ttbr0_for_lpa2((u64)init_pg_dir);
set_ttbr0_for_lpa2((phys_addr_t)init_pg_dir);
/*
* Recreate the initial ID map with the same granularity as before.
* Don't bother with the FDT, we no longer need it after this.
*/
memset(init_idmap_pg_dir, 0,
(u64)init_idmap_pg_end - (u64)init_idmap_pg_dir);
(char *)init_idmap_pg_end - (char *)init_idmap_pg_dir);
create_init_idmap(init_idmap_pg_dir, mask);
dsb(ishst);
/* switch back to the updated initial ID map */
set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
set_ttbr0_for_lpa2((phys_addr_t)init_idmap_pg_dir);
/* wipe the temporary ID map from memory */
memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
memset(init_pg_dir, 0, (char *)init_pg_end - (char *)init_pg_dir);
}
static void __init map_fdt(u64 fdt)
static void *__init map_fdt(phys_addr_t fdt)
{
static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
u64 efdt = fdt + MAX_FDT_SIZE;
u64 ptep = (u64)ptes;
phys_addr_t efdt = fdt + MAX_FDT_SIZE;
phys_addr_t ptep = (phys_addr_t)ptes; /* We're idmapped when called */
/*
* Map up to MAX_FDT_SIZE bytes, but avoid overlap with
@ -205,6 +211,8 @@ static void __init map_fdt(u64 fdt)
fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
(pte_t *)init_idmap_pg_dir, false, 0);
dsb(ishst);
return (void *)fdt;
}
/*
@ -230,7 +238,7 @@ static bool __init ng_mappings_allowed(void)
return true;
}
asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt)
{
static char const chosen_str[] __initconst = "/chosen";
u64 va_base, pa_base = (u64)&_text;
@ -238,15 +246,14 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
int root_level = 4 - CONFIG_PGTABLE_LEVELS;
int va_bits = VA_BITS;
int chosen;
map_fdt((u64)fdt);
void *fdt_mapped = map_fdt(fdt);
/* Clear BSS and the initial page tables */
memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
memset(__bss_start, 0, (char *)init_pg_end - (char *)__bss_start);
/* Parse the command line for CPU feature overrides */
chosen = fdt_path_offset(fdt, chosen_str);
init_feature_override(boot_status, fdt, chosen);
chosen = fdt_path_offset(fdt_mapped, chosen_str);
init_feature_override(boot_status, fdt_mapped, chosen);
if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
va_bits = VA_BITS_MIN;
@ -266,7 +273,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
* fill in the high bits from the seed.
*/
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
u64 kaslr_seed = kaslr_early_init(fdt, chosen);
u64 kaslr_seed = kaslr_early_init(fdt_mapped, chosen);
if (kaslr_seed && kaslr_requires_kpti())
arm64_use_ng_mappings = ng_mappings_allowed();

@ -26,8 +26,9 @@
* @va_offset: Offset between a physical page and its current mapping
* in the VA space
*/
void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
void __init map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
u64 va_offset)
{
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
ptdesc_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
@ -87,19 +88,22 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
}
}
asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask)
asmlinkage phys_addr_t __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask)
{
u64 ptep = (u64)pg_dir + PAGE_SIZE;
phys_addr_t ptep = (phys_addr_t)pg_dir + PAGE_SIZE; /* MMU is off */
pgprot_t text_prot = PAGE_KERNEL_ROX;
pgprot_t data_prot = PAGE_KERNEL;
pgprot_val(text_prot) &= ~clrmask;
pgprot_val(data_prot) &= ~clrmask;
map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
/* MMU is off; pointer casts to phys_addr_t are safe */
map_range(&ptep, (u64)_stext, (u64)__initdata_begin,
(phys_addr_t)_stext, text_prot, IDMAP_ROOT_LEVEL,
(pte_t *)pg_dir, false, 0);
map_range(&ptep, (u64)__initdata_begin, (u64)_end,
(phys_addr_t)__initdata_begin, data_prot, IDMAP_ROOT_LEVEL,
(pte_t *)pg_dir, false, 0);
return ptep;
}

@ -29,9 +29,10 @@ u64 kaslr_early_init(void *fdt, int chosen);
void relocate_kernel(u64 offset);
int scs_patch(const u8 eh_frame[], int size);
void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
u64 va_offset);
asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
asmlinkage void early_map_kernel(u64 boot_status, phys_addr_t fdt);
asmlinkage u64 create_init_idmap(pgd_t *pgd, ptdesc_t clrmask);
asmlinkage phys_addr_t create_init_idmap(pgd_t *pgd, ptdesc_t clrmask);

@ -214,7 +214,7 @@ static void __init request_standard_resources(void)
unsigned long i = 0;
size_t res_size;
kernel_code.start = __pa_symbol(_stext);
kernel_code.start = __pa_symbol(_text);
kernel_code.end = __pa_symbol(__init_begin - 1);
kernel_data.start = __pa_symbol(_sdata);
kernel_data.end = __pa_symbol(_end - 1);
@ -280,7 +280,7 @@ u64 cpu_logical_map(unsigned int cpu)
void __init __no_sanitize_address setup_arch(char **cmdline_p)
{
setup_initial_init_mm(_stext, _etext, _edata, _end);
setup_initial_init_mm(_text, _etext, _edata, _end);
*cmdline_p = boot_command_line;

@ -243,7 +243,7 @@ void __init arm64_memblock_init(void)
*/
if (memory_limit != PHYS_ADDR_MAX) {
memblock_mem_limit_remove_map(memory_limit);
memblock_add(__pa_symbol(_text), (u64)(_end - _text));
memblock_add(__pa_symbol(_text), (resource_size_t)(_end - _text));
}
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
@ -252,8 +252,8 @@ void __init arm64_memblock_init(void)
* initrd to become inaccessible via the linear mapping.
* Otherwise, this is a no-op
*/
u64 base = phys_initrd_start & PAGE_MASK;
u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
phys_addr_t base = phys_initrd_start & PAGE_MASK;
resource_size_t size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
/*
* We can only add back the initrd memory if we don't end up
@ -279,7 +279,7 @@ void __init arm64_memblock_init(void)
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
memblock_reserve(__pa_symbol(_stext), _end - _stext);
memblock_reserve(__pa_symbol(_text), _end - _text);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
/* the generic initrd code expects virtual addresses */
initrd_start = __phys_to_virt(phys_initrd_start);

@ -27,6 +27,8 @@
#include <linux/kfence.h>
#include <linux/pkeys.h>
#include <linux/mm_inline.h>
#include <linux/pagewalk.h>
#include <linux/stop_machine.h>
#include <asm/barrier.h>
#include <asm/cputype.h>
@ -47,6 +49,8 @@
#define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key);
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
@ -474,14 +478,18 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
int flags);
#endif
static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
#define INVALID_PHYS_ADDR (-1ULL)
static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp,
enum pgtable_type pgtable_type)
{
/* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0);
struct ptdesc *ptdesc = pagetable_alloc(gfp & ~__GFP_ZERO, 0);
phys_addr_t pa;
BUG_ON(!ptdesc);
if (!ptdesc)
return INVALID_PHYS_ADDR;
pa = page_to_phys(ptdesc_page(ptdesc));
switch (pgtable_type) {
@ -502,16 +510,392 @@ static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
return pa;
}
static phys_addr_t
try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp)
{
return __pgd_pgtable_alloc(&init_mm, gfp, pgtable_type);
}
static phys_addr_t __maybe_unused
pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
{
return __pgd_pgtable_alloc(&init_mm, pgtable_type);
phys_addr_t pa;
pa = __pgd_pgtable_alloc(&init_mm, GFP_PGTABLE_KERNEL, pgtable_type);
BUG_ON(pa == INVALID_PHYS_ADDR);
return pa;
}
static phys_addr_t
pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type)
{
return __pgd_pgtable_alloc(NULL, pgtable_type);
phys_addr_t pa;
pa = __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type);
BUG_ON(pa == INVALID_PHYS_ADDR);
return pa;
}
static void split_contpte(pte_t *ptep)
{
int i;
ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
for (i = 0; i < CONT_PTES; i++, ptep++)
__set_pte(ptep, pte_mknoncont(__ptep_get(ptep)));
}
static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
{
pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
unsigned long pfn = pmd_pfn(pmd);
pgprot_t prot = pmd_pgprot(pmd);
phys_addr_t pte_phys;
pte_t *ptep;
int i;
pte_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PTE, gfp);
if (pte_phys == INVALID_PHYS_ADDR)
return -ENOMEM;
ptep = (pte_t *)phys_to_virt(pte_phys);
if (pgprot_val(prot) & PMD_SECT_PXN)
tableprot |= PMD_TABLE_PXN;
prot = __pgprot((pgprot_val(prot) & ~PTE_TYPE_MASK) | PTE_TYPE_PAGE);
prot = __pgprot(pgprot_val(prot) & ~PTE_CONT);
if (to_cont)
prot = __pgprot(pgprot_val(prot) | PTE_CONT);
for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++)
__set_pte(ptep, pfn_pte(pfn, prot));
/*
* Ensure the pte entries are visible to the table walker by the time
* the pmd entry that points to the ptes is visible.
*/
dsb(ishst);
__pmd_populate(pmdp, pte_phys, tableprot);
return 0;
}
static void split_contpmd(pmd_t *pmdp)
{
int i;
pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS);
for (i = 0; i < CONT_PMDS; i++, pmdp++)
set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp)));
}
static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
{
pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF;
unsigned int step = PMD_SIZE >> PAGE_SHIFT;
unsigned long pfn = pud_pfn(pud);
pgprot_t prot = pud_pgprot(pud);
phys_addr_t pmd_phys;
pmd_t *pmdp;
int i;
pmd_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PMD, gfp);
if (pmd_phys == INVALID_PHYS_ADDR)
return -ENOMEM;
pmdp = (pmd_t *)phys_to_virt(pmd_phys);
if (pgprot_val(prot) & PMD_SECT_PXN)
tableprot |= PUD_TABLE_PXN;
prot = __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
prot = __pgprot(pgprot_val(prot) & ~PTE_CONT);
if (to_cont)
prot = __pgprot(pgprot_val(prot) | PTE_CONT);
for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step)
set_pmd(pmdp, pfn_pmd(pfn, prot));
/*
* Ensure the pmd entries are visible to the table walker by the time
* the pud entry that points to the pmds is visible.
*/
dsb(ishst);
__pud_populate(pudp, pmd_phys, tableprot);
return 0;
}
static int split_kernel_leaf_mapping_locked(unsigned long addr)
{
pgd_t *pgdp, pgd;
p4d_t *p4dp, p4d;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pte_t *ptep, pte;
int ret = 0;
/*
* PGD: If addr is PGD aligned then addr already describes a leaf
* boundary. If not present then there is nothing to split.
*/
if (ALIGN_DOWN(addr, PGDIR_SIZE) == addr)
goto out;
pgdp = pgd_offset_k(addr);
pgd = pgdp_get(pgdp);
if (!pgd_present(pgd))
goto out;
/*
* P4D: If addr is P4D aligned then addr already describes a leaf
* boundary. If not present then there is nothing to split.
*/
if (ALIGN_DOWN(addr, P4D_SIZE) == addr)
goto out;
p4dp = p4d_offset(pgdp, addr);
p4d = p4dp_get(p4dp);
if (!p4d_present(p4d))
goto out;
/*
* PUD: If addr is PUD aligned then addr already describes a leaf
* boundary. If not present then there is nothing to split. Otherwise,
* if we have a pud leaf, split to contpmd.
*/
if (ALIGN_DOWN(addr, PUD_SIZE) == addr)
goto out;
pudp = pud_offset(p4dp, addr);
pud = pudp_get(pudp);
if (!pud_present(pud))
goto out;
if (pud_leaf(pud)) {
ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true);
if (ret)
goto out;
}
/*
* CONTPMD: If addr is CONTPMD aligned then addr already describes a
* leaf boundary. If not present then there is nothing to split.
* Otherwise, if we have a contpmd leaf, split to pmd.
*/
if (ALIGN_DOWN(addr, CONT_PMD_SIZE) == addr)
goto out;
pmdp = pmd_offset(pudp, addr);
pmd = pmdp_get(pmdp);
if (!pmd_present(pmd))
goto out;
if (pmd_leaf(pmd)) {
if (pmd_cont(pmd))
split_contpmd(pmdp);
/*
* PMD: If addr is PMD aligned then addr already describes a
* leaf boundary. Otherwise, split to contpte.
*/
if (ALIGN_DOWN(addr, PMD_SIZE) == addr)
goto out;
ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true);
if (ret)
goto out;
}
/*
* CONTPTE: If addr is CONTPTE aligned then addr already describes a
* leaf boundary. If not present then there is nothing to split.
* Otherwise, if we have a contpte leaf, split to pte.
*/
if (ALIGN_DOWN(addr, CONT_PTE_SIZE) == addr)
goto out;
ptep = pte_offset_kernel(pmdp, addr);
pte = __ptep_get(ptep);
if (!pte_present(pte))
goto out;
if (pte_cont(pte))
split_contpte(ptep);
out:
return ret;
}
static DEFINE_MUTEX(pgtable_split_lock);
int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
{
int ret;
/*
* !BBML2_NOABORT systems should not be trying to change permissions on
* anything that is not pte-mapped in the first place. Just return early
* and let the permission change code raise a warning if not already
* pte-mapped.
*/
if (!system_supports_bbml2_noabort())
return 0;
/*
* Ensure start and end are at least page-aligned since this is the
* finest granularity we can split to.
*/
if (start != PAGE_ALIGN(start) || end != PAGE_ALIGN(end))
return -EINVAL;
mutex_lock(&pgtable_split_lock);
arch_enter_lazy_mmu_mode();
/*
* The split_kernel_leaf_mapping_locked() may sleep, it is not a
* problem for ARM64 since ARM64's lazy MMU implementation allows
* sleeping.
*
* Optimize for the common case of splitting out a single page from a
* larger mapping. Here we can just split on the "least aligned" of
* start and end and this will guarantee that there must also be a split
* on the more aligned address since the both addresses must be in the
* same contpte block and it must have been split to ptes.
*/
if (end - start == PAGE_SIZE) {
start = __ffs(start) < __ffs(end) ? start : end;
ret = split_kernel_leaf_mapping_locked(start);
} else {
ret = split_kernel_leaf_mapping_locked(start);
if (!ret)
ret = split_kernel_leaf_mapping_locked(end);
}
arch_leave_lazy_mmu_mode();
mutex_unlock(&pgtable_split_lock);
return ret;
}
static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
unsigned long next,
struct mm_walk *walk)
{
pud_t pud = pudp_get(pudp);
int ret = 0;
if (pud_leaf(pud))
ret = split_pud(pudp, pud, GFP_ATOMIC, false);
return ret;
}
static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
unsigned long next,
struct mm_walk *walk)
{
pmd_t pmd = pmdp_get(pmdp);
int ret = 0;
if (pmd_leaf(pmd)) {
if (pmd_cont(pmd))
split_contpmd(pmdp);
ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false);
/*
* We have split the pmd directly to ptes so there is no need to
* visit each pte to check if they are contpte.
*/
walk->action = ACTION_CONTINUE;
}
return ret;
}
static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
unsigned long next,
struct mm_walk *walk)
{
pte_t pte = __ptep_get(ptep);
if (pte_cont(pte))
split_contpte(ptep);
return 0;
}
static const struct mm_walk_ops split_to_ptes_ops __initconst = {
.pud_entry = split_to_ptes_pud_entry,
.pmd_entry = split_to_ptes_pmd_entry,
.pte_entry = split_to_ptes_pte_entry,
};
static bool linear_map_requires_bbml2 __initdata;
u32 idmap_kpti_bbml2_flag;
void __init init_idmap_kpti_bbml2_flag(void)
{
WRITE_ONCE(idmap_kpti_bbml2_flag, 1);
/* Must be visible to other CPUs before stop_machine() is called. */
smp_mb();
}
static int __init linear_map_split_to_ptes(void *__unused)
{
/*
* Repainting the linear map must be done by CPU0 (the boot CPU) because
* that's the only CPU that we know supports BBML2. The other CPUs will
* be held in a waiting area with the idmap active.
*/
if (!smp_processor_id()) {
unsigned long lstart = _PAGE_OFFSET(vabits_actual);
unsigned long lend = PAGE_END;
unsigned long kstart = (unsigned long)lm_alias(_stext);
unsigned long kend = (unsigned long)lm_alias(__init_begin);
int ret;
/*
* Wait for all secondary CPUs to be put into the waiting area.
*/
smp_cond_load_acquire(&idmap_kpti_bbml2_flag, VAL == num_online_cpus());
/*
* Walk all of the linear map [lstart, lend), except the kernel
* linear map alias [kstart, kend), and split all mappings to
* PTE. The kernel alias remains static throughout runtime so
* can continue to be safely mapped with large mappings.
*/
ret = walk_kernel_page_table_range_lockless(lstart, kstart,
&split_to_ptes_ops, NULL, NULL);
if (!ret)
ret = walk_kernel_page_table_range_lockless(kend, lend,
&split_to_ptes_ops, NULL, NULL);
if (ret)
panic("Failed to split linear map\n");
flush_tlb_kernel_range(lstart, lend);
/*
* Relies on dsb in flush_tlb_kernel_range() to avoid reordering
* before any page table split operations.
*/
WRITE_ONCE(idmap_kpti_bbml2_flag, 0);
} else {
typedef void (wait_split_fn)(void);
extern wait_split_fn wait_linear_map_split_to_ptes;
wait_split_fn *wait_fn;
wait_fn = (void *)__pa_symbol(wait_linear_map_split_to_ptes);
/*
* At least one secondary CPU doesn't support BBML2 so cannot
* tolerate the size of the live mappings changing. So have the
* secondary CPUs wait for the boot CPU to make the changes
* with the idmap active and init_mm inactive.
*/
cpu_install_idmap();
wait_fn();
cpu_uninstall_idmap();
}
return 0;
}
void __init linear_map_maybe_split_to_ptes(void)
{
if (linear_map_requires_bbml2 && !system_supports_bbml2_noabort()) {
init_idmap_kpti_bbml2_flag();
stop_machine(linear_map_split_to_ptes, NULL, cpu_online_mask);
}
}
/*
@ -574,8 +958,8 @@ void __init mark_linear_text_alias_ro(void)
/*
* Remove the write permissions from the linear alias of .text/.rodata
*/
update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
(unsigned long)__init_begin - (unsigned long)_stext,
update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
(unsigned long)__init_begin - (unsigned long)_text,
PAGE_KERNEL_RO);
}
@ -633,10 +1017,20 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) {
#endif /* CONFIG_KFENCE */
static inline bool force_pte_mapping(void)
{
bool bbml2 = system_capabilities_finalized() ?
system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort();
return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() ||
is_realm_world())) ||
debug_pagealloc_enabled();
}
static void __init map_mem(pgd_t *pgdp)
{
static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
phys_addr_t kernel_start = __pa_symbol(_stext);
phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin);
phys_addr_t start, end;
phys_addr_t early_kfence_pool;
@ -658,7 +1052,9 @@ static void __init map_mem(pgd_t *pgdp)
early_kfence_pool = arm64_kfence_alloc_pool();
if (can_set_direct_map())
linear_map_requires_bbml2 = !force_pte_mapping() && can_set_direct_map();
if (force_pte_mapping())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
@ -683,7 +1079,7 @@ static void __init map_mem(pgd_t *pgdp)
}
/*
* Map the linear alias of the [_stext, __init_begin) interval
* Map the linear alias of the [_text, __init_begin) interval
* as non-executable now, and remove the write permission in
* mark_linear_text_alias_ro() below (which will be called after
* alternative patching has completed). This makes the contents
@ -710,6 +1106,10 @@ void mark_rodata_ro(void)
WRITE_ONCE(rodata_is_rw, false);
update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
section_size, PAGE_KERNEL_RO);
/* mark the range between _text and _stext as read only. */
update_mapping_prot(__pa_symbol(_text), (unsigned long)_text,
(unsigned long)_stext - (unsigned long)_text,
PAGE_KERNEL_RO);
}
static void __init declare_vma(struct vm_struct *vma,
@ -780,38 +1180,41 @@ static void __init declare_kernel_vmas(void)
{
static struct vm_struct vmlinux_seg[KERNEL_SEGMENT_COUNT];
declare_vma(&vmlinux_seg[0], _stext, _etext, VM_NO_GUARD);
declare_vma(&vmlinux_seg[0], _text, _etext, VM_NO_GUARD);
declare_vma(&vmlinux_seg[1], __start_rodata, __inittext_begin, VM_NO_GUARD);
declare_vma(&vmlinux_seg[2], __inittext_begin, __inittext_end, VM_NO_GUARD);
declare_vma(&vmlinux_seg[3], __initdata_begin, __initdata_end, VM_NO_GUARD);
declare_vma(&vmlinux_seg[4], _data, _end, 0);
}
void __pi_map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
void __pi_map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
u64 va_offset);
static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init,
kpti_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
kpti_bbml2_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
static void __init create_idmap(void)
{
u64 start = __pa_symbol(__idmap_text_start);
u64 end = __pa_symbol(__idmap_text_end);
u64 ptep = __pa_symbol(idmap_ptes);
phys_addr_t start = __pa_symbol(__idmap_text_start);
phys_addr_t end = __pa_symbol(__idmap_text_end);
phys_addr_t ptep = __pa_symbol(idmap_ptes);
__pi_map_range(&ptep, start, end, start, PAGE_KERNEL_ROX,
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings) {
extern u32 __idmap_kpti_flag;
u64 pa = __pa_symbol(&__idmap_kpti_flag);
if (linear_map_requires_bbml2 ||
(IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings)) {
phys_addr_t pa = __pa_symbol(&idmap_kpti_bbml2_flag);
/*
* The KPTI G-to-nG conversion code needs a read-write mapping
* of its synchronization flag in the ID map.
* of its synchronization flag in the ID map. This is also used
* when splitting the linear map to ptes if a secondary CPU
* doesn't support bbml2.
*/
ptep = __pa_symbol(kpti_ptes);
ptep = __pa_symbol(kpti_bbml2_ptes);
__pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
__phys_to_virt(ptep) - ptep);
@ -1261,7 +1664,8 @@ int pmd_clear_huge(pmd_t *pmdp)
return 1;
}
int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
static int __pmd_free_pte_page(pmd_t *pmdp, unsigned long addr,
bool acquire_mmap_lock)
{
pte_t *table;
pmd_t pmd;
@ -1273,13 +1677,25 @@ int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
return 1;
}
/* See comment in pud_free_pmd_page for static key logic */
table = pte_offset_kernel(pmdp, addr);
pmd_clear(pmdp);
__flush_tlb_kernel_pgtable(addr);
if (static_branch_unlikely(&arm64_ptdump_lock_key) && acquire_mmap_lock) {
mmap_read_lock(&init_mm);
mmap_read_unlock(&init_mm);
}
pte_free_kernel(NULL, table);
return 1;
}
int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
{
/* If ptdump is walking the pagetables, acquire init_mm.mmap_lock */
return __pmd_free_pte_page(pmdp, addr, /* acquire_mmap_lock = */ true);
}
int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
{
pmd_t *table;
@ -1295,16 +1711,36 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
}
table = pmd_offset(pudp, addr);
/*
* Our objective is to prevent ptdump from reading a PMD table which has
* been freed. In this race, if pud_free_pmd_page observes the key on
* (which got flipped by ptdump) then the mmap lock sequence here will,
* as a result of the mmap write lock/unlock sequence in ptdump, give
* us the correct synchronization. If not, this means that ptdump has
* yet not started walking the pagetables - the sequence of barriers
* issued by __flush_tlb_kernel_pgtable() guarantees that ptdump will
* observe an empty PUD.
*/
pud_clear(pudp);
__flush_tlb_kernel_pgtable(addr);
if (static_branch_unlikely(&arm64_ptdump_lock_key)) {
mmap_read_lock(&init_mm);
mmap_read_unlock(&init_mm);
}
pmdp = table;
next = addr;
end = addr + PUD_SIZE;
do {
if (pmd_present(pmdp_get(pmdp)))
pmd_free_pte_page(pmdp, next);
/*
* PMD has been isolated, so ptdump won't see it. No
* need to acquire init_mm.mmap_lock.
*/
__pmd_free_pte_page(pmdp, next, /* acquire_mmap_lock = */ false);
} while (pmdp++, next += PMD_SIZE, next != end);
pud_clear(pudp);
__flush_tlb_kernel_pgtable(addr);
pmd_free(NULL, table);
return 1;
}
@ -1324,8 +1760,8 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
struct range arch_get_mappable_range(void)
{
struct range mhp_range;
u64 start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual));
u64 end_linear_pa = __pa(PAGE_END - 1);
phys_addr_t start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual));
phys_addr_t end_linear_pa = __pa(PAGE_END - 1);
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
/*
@ -1360,7 +1796,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
VM_BUG_ON(!mhp_range_allowed(start, size, true));
if (can_set_direct_map())
if (force_pte_mapping())
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),

@ -8,6 +8,7 @@
#include <linux/mem_encrypt.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/pagewalk.h>
#include <asm/cacheflush.h>
#include <asm/pgtable-prot.h>
@ -20,7 +21,66 @@ struct page_change_data {
pgprot_t clear_mask;
};
bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED);
static ptdesc_t set_pageattr_masks(ptdesc_t val, struct mm_walk *walk)
{
struct page_change_data *masks = walk->private;
val &= ~(pgprot_val(masks->clear_mask));
val |= (pgprot_val(masks->set_mask));
return val;
}
static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
pud_t val = pudp_get(pud);
if (pud_sect(val)) {
if (WARN_ON_ONCE((next - addr) != PUD_SIZE))
return -EINVAL;
val = __pud(set_pageattr_masks(pud_val(val), walk));
set_pud(pud, val);
walk->action = ACTION_CONTINUE;
}
return 0;
}
static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
pmd_t val = pmdp_get(pmd);
if (pmd_sect(val)) {
if (WARN_ON_ONCE((next - addr) != PMD_SIZE))
return -EINVAL;
val = __pmd(set_pageattr_masks(pmd_val(val), walk));
set_pmd(pmd, val);
walk->action = ACTION_CONTINUE;
}
return 0;
}
static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
pte_t val = __ptep_get(pte);
val = __pte(set_pageattr_masks(pte_val(val), walk));
__set_pte(pte, val);
return 0;
}
static const struct mm_walk_ops pageattr_ops = {
.pud_entry = pageattr_pud_entry,
.pmd_entry = pageattr_pmd_entry,
.pte_entry = pageattr_pte_entry,
};
bool rodata_full __ro_after_init = true;
bool can_set_direct_map(void)
{
@ -37,23 +97,8 @@ bool can_set_direct_map(void)
arm64_kfence_can_set_direct_map() || is_realm_world();
}
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
struct page_change_data *cdata = data;
pte_t pte = __ptep_get(ptep);
pte = clear_pte_bit(pte, cdata->clear_mask);
pte = set_pte_bit(pte, cdata->set_mask);
__set_pte(ptep, pte);
return 0;
}
/*
* This function assumes that the range is mapped with PAGE_SIZE pages.
*/
static int __change_memory_common(unsigned long start, unsigned long size,
pgprot_t set_mask, pgprot_t clear_mask)
static int update_range_prot(unsigned long start, unsigned long size,
pgprot_t set_mask, pgprot_t clear_mask)
{
struct page_change_data data;
int ret;
@ -61,8 +106,30 @@ static int __change_memory_common(unsigned long start, unsigned long size,
data.set_mask = set_mask;
data.clear_mask = clear_mask;
ret = apply_to_page_range(&init_mm, start, size, change_page_range,
&data);
ret = split_kernel_leaf_mapping(start, start + size);
if (WARN_ON_ONCE(ret))
return ret;
arch_enter_lazy_mmu_mode();
/*
* The caller must ensure that the range we are operating on does not
* partially overlap a block mapping, or a cont mapping. Any such case
* must be eliminated by splitting the mapping.
*/
ret = walk_kernel_page_table_range_lockless(start, start + size,
&pageattr_ops, NULL, &data);
arch_leave_lazy_mmu_mode();
return ret;
}
static int __change_memory_common(unsigned long start, unsigned long size,
pgprot_t set_mask, pgprot_t clear_mask)
{
int ret;
ret = update_range_prot(start, size, set_mask, clear_mask);
/*
* If the memory is being made valid without changing any other bits
@ -174,32 +241,26 @@ int set_memory_valid(unsigned long addr, int numpages, int enable)
int set_direct_map_invalid_noflush(struct page *page)
{
struct page_change_data data = {
.set_mask = __pgprot(0),
.clear_mask = __pgprot(PTE_VALID),
};
pgprot_t clear_mask = __pgprot(PTE_VALID);
pgprot_t set_mask = __pgprot(0);
if (!can_set_direct_map())
return 0;
return apply_to_page_range(&init_mm,
(unsigned long)page_address(page),
PAGE_SIZE, change_page_range, &data);
return update_range_prot((unsigned long)page_address(page),
PAGE_SIZE, set_mask, clear_mask);
}
int set_direct_map_default_noflush(struct page *page)
{
struct page_change_data data = {
.set_mask = __pgprot(PTE_VALID | PTE_WRITE),
.clear_mask = __pgprot(PTE_RDONLY),
};
pgprot_t set_mask = __pgprot(PTE_VALID | PTE_WRITE);
pgprot_t clear_mask = __pgprot(PTE_RDONLY);
if (!can_set_direct_map())
return 0;
return apply_to_page_range(&init_mm,
(unsigned long)page_address(page),
PAGE_SIZE, change_page_range, &data);
return update_range_prot((unsigned long)page_address(page),
PAGE_SIZE, set_mask, clear_mask);
}
static int __set_memory_enc_dec(unsigned long addr,

@ -245,10 +245,6 @@ SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
*
* Called exactly once from stop_machine context by each CPU found during boot.
*/
.pushsection ".data", "aw", %progbits
SYM_DATA(__idmap_kpti_flag, .long 1)
.popsection
SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
cpu .req w0
temp_pte .req x0
@ -273,7 +269,7 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
mov x5, x3 // preserve temp_pte arg
mrs swapper_ttb, ttbr1_el1
adr_l flag_ptr, __idmap_kpti_flag
adr_l flag_ptr, idmap_kpti_bbml2_flag
cbnz cpu, __idmap_kpti_secondary
@ -416,7 +412,25 @@ alternative_else_nop_endif
__idmap_kpti_secondary:
/* Uninstall swapper before surgery begins */
__idmap_cpu_set_reserved_ttbr1 x16, x17
b scondary_cpu_wait
.unreq swapper_ttb
.unreq flag_ptr
SYM_FUNC_END(idmap_kpti_install_ng_mappings)
.popsection
#endif
.pushsection ".idmap.text", "a"
SYM_TYPED_FUNC_START(wait_linear_map_split_to_ptes)
/* Must be same registers as in idmap_kpti_install_ng_mappings */
swapper_ttb .req x3
flag_ptr .req x4
mrs swapper_ttb, ttbr1_el1
adr_l flag_ptr, idmap_kpti_bbml2_flag
__idmap_cpu_set_reserved_ttbr1 x16, x17
scondary_cpu_wait:
/* Increment the flag to let the boot CPU we're ready */
1: ldxr w16, [flag_ptr]
add w16, w16, #1
@ -436,9 +450,8 @@ __idmap_kpti_secondary:
.unreq swapper_ttb
.unreq flag_ptr
SYM_FUNC_END(idmap_kpti_install_ng_mappings)
SYM_FUNC_END(wait_linear_map_split_to_ptes)
.popsection
#endif
/*
* __cpu_setup

@ -283,6 +283,13 @@ void note_page_flush(struct ptdump_state *pt_st)
note_page(pt_st, 0, -1, pte_val(pte_zero));
}
static void arm64_ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm)
{
static_branch_inc(&arm64_ptdump_lock_key);
ptdump_walk_pgd(st, mm, NULL);
static_branch_dec(&arm64_ptdump_lock_key);
}
void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
{
unsigned long end = ~0UL;
@ -311,7 +318,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
}
};
ptdump_walk_pgd(&st.ptdump, info->mm, NULL);
arm64_ptdump_walk_pgd(&st.ptdump, info->mm);
}
static void __init ptdump_initialize(void)
@ -353,7 +360,7 @@ bool ptdump_check_wx(void)
}
};
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
arm64_ptdump_walk_pgd(&st.ptdump, &init_mm);
if (st.wx_pages || st.uxn_pages) {
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",

@ -134,6 +134,9 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
int walk_kernel_page_table_range(unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
pgd_t *pgd, void *private);
int walk_kernel_page_table_range_lockless(unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
pgd_t *pgd, void *private);
int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
unsigned long end, const struct mm_walk_ops *ops,
void *private);

@ -606,10 +606,32 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
int walk_kernel_page_table_range(unsigned long start, unsigned long end,
const struct mm_walk_ops *ops, pgd_t *pgd, void *private)
{
struct mm_struct *mm = &init_mm;
/*
* Kernel intermediate page tables are usually not freed, so the mmap
* read lock is sufficient. But there are some exceptions.
* E.g. memory hot-remove. In which case, the mmap lock is insufficient
* to prevent the intermediate kernel pages tables belonging to the
* specified address range from being freed. The caller should take
* other actions to prevent this race.
*/
mmap_assert_locked(&init_mm);
return walk_kernel_page_table_range_lockless(start, end, ops, pgd,
private);
}
/*
* Use this function to walk the kernel page tables locklessly. It should be
* guaranteed that the caller has exclusive access over the range they are
* operating on - that there should be no concurrent access, for example,
* changing permissions for vmalloc objects.
*/
int walk_kernel_page_table_range_lockless(unsigned long start, unsigned long end,
const struct mm_walk_ops *ops, pgd_t *pgd, void *private)
{
struct mm_walk walk = {
.ops = ops,
.mm = mm,
.mm = &init_mm,
.pgd = pgd,
.private = private,
.no_vma = true
@ -620,16 +642,6 @@ int walk_kernel_page_table_range(unsigned long start, unsigned long end,
if (!check_ops_valid(ops))
return -EINVAL;
/*
* Kernel intermediate page tables are usually not freed, so the mmap
* read lock is sufficient. But there are some exceptions.
* E.g. memory hot-remove. In which case, the mmap lock is insufficient
* to prevent the intermediate kernel pages tables belonging to the
* specified address range from being freed. The caller should take
* other actions to prevent this race.
*/
mmap_assert_locked(mm);
return walk_pgd_range(start, end, &walk);
}