Merge branch 'for-next/mm' into for-next/core
* for-next/mm: arm64: map [_text, _stext) virtual address range non-executable+read-only arm64: Enable vmalloc-huge with ptdump arm64: mm: split linear mapping if BBML2 unsupported on secondary CPUs arm64: mm: support large block mapping when rodata=full arm64: Enable permission change on arm64 kernel block mappings arm64/Kconfig: Remove CONFIG_RODATA_FULL_DEFAULT_ENABLED arm64: mm: Rework the 'rodata=' options arm64: mm: Represent physical memory with phys_addr_t and resource_size_t arm64: mm: Make map_fdt() return mapped pointer arm64: mm: Cast start/end markers to char *, not u64
This commit is contained in:
commit
77dfca70ba
@ -6405,8 +6405,9 @@
|
||||
rodata= [KNL,EARLY]
|
||||
on Mark read-only kernel memory as read-only (default).
|
||||
off Leave read-only kernel memory writable for debugging.
|
||||
full Mark read-only kernel memory and aliases as read-only
|
||||
[arm64]
|
||||
noalias Mark read-only kernel memory as read-only but retain
|
||||
writable aliases in the direct map for regions outside
|
||||
of the kernel image. [arm64]
|
||||
|
||||
rockchip.usb_uart
|
||||
[EARLY]
|
||||
|
||||
@ -1700,20 +1700,6 @@ config MITIGATE_SPECTRE_BRANCH_HISTORY
|
||||
When taking an exception from user-space, a sequence of branches
|
||||
or a firmware call overwrites the branch history.
|
||||
|
||||
config RODATA_FULL_DEFAULT_ENABLED
|
||||
bool "Apply r/o permissions of VM areas also to their linear aliases"
|
||||
default y
|
||||
help
|
||||
Apply read-only attributes of VM areas to the linear alias of
|
||||
the backing pages as well. This prevents code or read-only data
|
||||
from being modified (inadvertently or intentionally) via another
|
||||
mapping of the same memory page. This additional enhancement can
|
||||
be turned off at runtime by passing rodata=[off|on] (and turned on
|
||||
with rodata=full if this option is set to 'n')
|
||||
|
||||
This requires the linear region to be mapped down to pages,
|
||||
which may adversely affect performance in some cases.
|
||||
|
||||
config ARM64_SW_TTBR0_PAN
|
||||
bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
|
||||
depends on !KCSAN
|
||||
|
||||
@ -871,6 +871,8 @@ static inline bool system_supports_pmuv3(void)
|
||||
return cpus_have_final_cap(ARM64_HAS_PMUV3);
|
||||
}
|
||||
|
||||
bool cpu_supports_bbml2_noabort(void);
|
||||
|
||||
static inline bool system_supports_bbml2_noabort(void)
|
||||
{
|
||||
return alternative_has_cap_unlikely(ARM64_HAS_BBML2_NOABORT);
|
||||
|
||||
@ -78,6 +78,9 @@ extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
|
||||
pgprot_t prot, bool page_mappings_only);
|
||||
extern void *fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot);
|
||||
extern void mark_linear_text_alias_ro(void);
|
||||
extern int split_kernel_leaf_mapping(unsigned long start, unsigned long end);
|
||||
extern void init_idmap_kpti_bbml2_flag(void);
|
||||
extern void linear_map_maybe_split_to_ptes(void);
|
||||
|
||||
/*
|
||||
* This check is triggered during the early boot before the cpufeature
|
||||
|
||||
@ -371,6 +371,11 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
|
||||
return __pmd(pmd_val(pmd) | PMD_SECT_CONT);
|
||||
}
|
||||
|
||||
static inline pmd_t pmd_mknoncont(pmd_t pmd)
|
||||
{
|
||||
return __pmd(pmd_val(pmd) & ~PMD_SECT_CONT);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
|
||||
static inline int pte_uffd_wp(pte_t pte)
|
||||
{
|
||||
|
||||
@ -7,6 +7,8 @@
|
||||
|
||||
#include <linux/ptdump.h>
|
||||
|
||||
DECLARE_STATIC_KEY_FALSE(arm64_ptdump_lock_key);
|
||||
|
||||
#ifdef CONFIG_PTDUMP
|
||||
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
@ -21,7 +21,7 @@ static inline bool arch_parse_debug_rodata(char *arg)
|
||||
if (!arg)
|
||||
return false;
|
||||
|
||||
if (!strcmp(arg, "full")) {
|
||||
if (!strcmp(arg, "on")) {
|
||||
rodata_enabled = rodata_full = true;
|
||||
return true;
|
||||
}
|
||||
@ -31,7 +31,7 @@ static inline bool arch_parse_debug_rodata(char *arg)
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!strcmp(arg, "on")) {
|
||||
if (!strcmp(arg, "noalias")) {
|
||||
rodata_enabled = true;
|
||||
rodata_full = false;
|
||||
return true;
|
||||
|
||||
@ -9,18 +9,13 @@
|
||||
#define arch_vmap_pud_supported arch_vmap_pud_supported
|
||||
static inline bool arch_vmap_pud_supported(pgprot_t prot)
|
||||
{
|
||||
/*
|
||||
* SW table walks can't handle removal of intermediate entries.
|
||||
*/
|
||||
return pud_sect_supported() &&
|
||||
!IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
|
||||
return pud_sect_supported();
|
||||
}
|
||||
|
||||
#define arch_vmap_pmd_supported arch_vmap_pmd_supported
|
||||
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
|
||||
{
|
||||
/* See arch_vmap_pud_supported() */
|
||||
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
|
||||
return true;
|
||||
}
|
||||
|
||||
#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
|
||||
|
||||
@ -86,6 +86,7 @@
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/mmu.h>
|
||||
#include <asm/mte.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/processor.h>
|
||||
@ -2029,6 +2030,7 @@ static void __init kpti_install_ng_mappings(void)
|
||||
if (arm64_use_ng_mappings)
|
||||
return;
|
||||
|
||||
init_idmap_kpti_bbml2_flag();
|
||||
stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask);
|
||||
}
|
||||
|
||||
@ -2219,7 +2221,7 @@ static bool hvhe_possible(const struct arm64_cpu_capabilities *entry,
|
||||
return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE);
|
||||
}
|
||||
|
||||
static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope)
|
||||
bool cpu_supports_bbml2_noabort(void)
|
||||
{
|
||||
/*
|
||||
* We want to allow usage of BBML2 in as wide a range of kernel contexts
|
||||
@ -2255,6 +2257,11 @@ static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int sco
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool has_bbml2_noabort(const struct arm64_cpu_capabilities *caps, int scope)
|
||||
{
|
||||
return cpu_supports_bbml2_noabort();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM64_PAN
|
||||
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
|
||||
{
|
||||
@ -3930,6 +3937,7 @@ void __init setup_system_features(void)
|
||||
{
|
||||
setup_system_capabilities();
|
||||
|
||||
linear_map_maybe_split_to_ptes();
|
||||
kpti_install_ng_mappings();
|
||||
|
||||
sve_setup();
|
||||
|
||||
@ -18,9 +18,9 @@
|
||||
|
||||
extern const u8 __eh_frame_start[], __eh_frame_end[];
|
||||
|
||||
extern void idmap_cpu_replace_ttbr1(void *pgdir);
|
||||
extern void idmap_cpu_replace_ttbr1(phys_addr_t pgdir);
|
||||
|
||||
static void __init map_segment(pgd_t *pg_dir, u64 *pgd, u64 va_offset,
|
||||
static void __init map_segment(pgd_t *pg_dir, phys_addr_t *pgd, u64 va_offset,
|
||||
void *start, void *end, pgprot_t prot,
|
||||
bool may_use_cont, int root_level)
|
||||
{
|
||||
@ -40,7 +40,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
|
||||
{
|
||||
bool enable_scs = IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS);
|
||||
bool twopass = IS_ENABLED(CONFIG_RELOCATABLE);
|
||||
u64 pgdp = (u64)init_pg_dir + PAGE_SIZE;
|
||||
phys_addr_t pgdp = (phys_addr_t)init_pg_dir + PAGE_SIZE;
|
||||
pgprot_t text_prot = PAGE_KERNEL_ROX;
|
||||
pgprot_t data_prot = PAGE_KERNEL;
|
||||
pgprot_t prot;
|
||||
@ -78,6 +78,12 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
|
||||
twopass |= enable_scs;
|
||||
prot = twopass ? data_prot : text_prot;
|
||||
|
||||
/*
|
||||
* [_stext, _text) isn't executed after boot and contains some
|
||||
* non-executable, unpredictable data, so map it non-executable.
|
||||
*/
|
||||
map_segment(init_pg_dir, &pgdp, va_offset, _text, _stext, data_prot,
|
||||
false, root_level);
|
||||
map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
|
||||
!twopass, root_level);
|
||||
map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
|
||||
@ -90,7 +96,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
|
||||
true, root_level);
|
||||
dsb(ishst);
|
||||
|
||||
idmap_cpu_replace_ttbr1(init_pg_dir);
|
||||
idmap_cpu_replace_ttbr1((phys_addr_t)init_pg_dir);
|
||||
|
||||
if (twopass) {
|
||||
if (IS_ENABLED(CONFIG_RELOCATABLE))
|
||||
@ -129,10 +135,10 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
|
||||
/* Copy the root page table to its final location */
|
||||
memcpy((void *)swapper_pg_dir + va_offset, init_pg_dir, PAGE_SIZE);
|
||||
dsb(ishst);
|
||||
idmap_cpu_replace_ttbr1(swapper_pg_dir);
|
||||
idmap_cpu_replace_ttbr1((phys_addr_t)swapper_pg_dir);
|
||||
}
|
||||
|
||||
static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(u64 ttbr)
|
||||
static void noinline __section(".idmap.text") set_ttbr0_for_lpa2(phys_addr_t ttbr)
|
||||
{
|
||||
u64 sctlr = read_sysreg(sctlr_el1);
|
||||
u64 tcr = read_sysreg(tcr_el1) | TCR_DS;
|
||||
@ -172,30 +178,30 @@ static void __init remap_idmap_for_lpa2(void)
|
||||
*/
|
||||
create_init_idmap(init_pg_dir, mask);
|
||||
dsb(ishst);
|
||||
set_ttbr0_for_lpa2((u64)init_pg_dir);
|
||||
set_ttbr0_for_lpa2((phys_addr_t)init_pg_dir);
|
||||
|
||||
/*
|
||||
* Recreate the initial ID map with the same granularity as before.
|
||||
* Don't bother with the FDT, we no longer need it after this.
|
||||
*/
|
||||
memset(init_idmap_pg_dir, 0,
|
||||
(u64)init_idmap_pg_end - (u64)init_idmap_pg_dir);
|
||||
(char *)init_idmap_pg_end - (char *)init_idmap_pg_dir);
|
||||
|
||||
create_init_idmap(init_idmap_pg_dir, mask);
|
||||
dsb(ishst);
|
||||
|
||||
/* switch back to the updated initial ID map */
|
||||
set_ttbr0_for_lpa2((u64)init_idmap_pg_dir);
|
||||
set_ttbr0_for_lpa2((phys_addr_t)init_idmap_pg_dir);
|
||||
|
||||
/* wipe the temporary ID map from memory */
|
||||
memset(init_pg_dir, 0, (u64)init_pg_end - (u64)init_pg_dir);
|
||||
memset(init_pg_dir, 0, (char *)init_pg_end - (char *)init_pg_dir);
|
||||
}
|
||||
|
||||
static void __init map_fdt(u64 fdt)
|
||||
static void *__init map_fdt(phys_addr_t fdt)
|
||||
{
|
||||
static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
|
||||
u64 efdt = fdt + MAX_FDT_SIZE;
|
||||
u64 ptep = (u64)ptes;
|
||||
phys_addr_t efdt = fdt + MAX_FDT_SIZE;
|
||||
phys_addr_t ptep = (phys_addr_t)ptes; /* We're idmapped when called */
|
||||
|
||||
/*
|
||||
* Map up to MAX_FDT_SIZE bytes, but avoid overlap with
|
||||
@ -205,6 +211,8 @@ static void __init map_fdt(u64 fdt)
|
||||
fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
|
||||
(pte_t *)init_idmap_pg_dir, false, 0);
|
||||
dsb(ishst);
|
||||
|
||||
return (void *)fdt;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -230,7 +238,7 @@ static bool __init ng_mappings_allowed(void)
|
||||
return true;
|
||||
}
|
||||
|
||||
asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
|
||||
asmlinkage void __init early_map_kernel(u64 boot_status, phys_addr_t fdt)
|
||||
{
|
||||
static char const chosen_str[] __initconst = "/chosen";
|
||||
u64 va_base, pa_base = (u64)&_text;
|
||||
@ -238,15 +246,14 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
|
||||
int root_level = 4 - CONFIG_PGTABLE_LEVELS;
|
||||
int va_bits = VA_BITS;
|
||||
int chosen;
|
||||
|
||||
map_fdt((u64)fdt);
|
||||
void *fdt_mapped = map_fdt(fdt);
|
||||
|
||||
/* Clear BSS and the initial page tables */
|
||||
memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
|
||||
memset(__bss_start, 0, (char *)init_pg_end - (char *)__bss_start);
|
||||
|
||||
/* Parse the command line for CPU feature overrides */
|
||||
chosen = fdt_path_offset(fdt, chosen_str);
|
||||
init_feature_override(boot_status, fdt, chosen);
|
||||
chosen = fdt_path_offset(fdt_mapped, chosen_str);
|
||||
init_feature_override(boot_status, fdt_mapped, chosen);
|
||||
|
||||
if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && !cpu_has_lva()) {
|
||||
va_bits = VA_BITS_MIN;
|
||||
@ -266,7 +273,7 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
|
||||
* fill in the high bits from the seed.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
|
||||
u64 kaslr_seed = kaslr_early_init(fdt, chosen);
|
||||
u64 kaslr_seed = kaslr_early_init(fdt_mapped, chosen);
|
||||
|
||||
if (kaslr_seed && kaslr_requires_kpti())
|
||||
arm64_use_ng_mappings = ng_mappings_allowed();
|
||||
|
||||
@ -26,8 +26,9 @@
|
||||
* @va_offset: Offset between a physical page and its current mapping
|
||||
* in the VA space
|
||||
*/
|
||||
void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
|
||||
int level, pte_t *tbl, bool may_use_cont, u64 va_offset)
|
||||
void __init map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
|
||||
pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
|
||||
u64 va_offset)
|
||||
{
|
||||
u64 cmask = (level == 3) ? CONT_PTE_SIZE - 1 : U64_MAX;
|
||||
ptdesc_t protval = pgprot_val(prot) & ~PTE_TYPE_MASK;
|
||||
@ -87,19 +88,22 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
|
||||
}
|
||||
}
|
||||
|
||||
asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask)
|
||||
asmlinkage phys_addr_t __init create_init_idmap(pgd_t *pg_dir, ptdesc_t clrmask)
|
||||
{
|
||||
u64 ptep = (u64)pg_dir + PAGE_SIZE;
|
||||
phys_addr_t ptep = (phys_addr_t)pg_dir + PAGE_SIZE; /* MMU is off */
|
||||
pgprot_t text_prot = PAGE_KERNEL_ROX;
|
||||
pgprot_t data_prot = PAGE_KERNEL;
|
||||
|
||||
pgprot_val(text_prot) &= ~clrmask;
|
||||
pgprot_val(data_prot) &= ~clrmask;
|
||||
|
||||
map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
|
||||
text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
|
||||
map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
|
||||
data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
|
||||
/* MMU is off; pointer casts to phys_addr_t are safe */
|
||||
map_range(&ptep, (u64)_stext, (u64)__initdata_begin,
|
||||
(phys_addr_t)_stext, text_prot, IDMAP_ROOT_LEVEL,
|
||||
(pte_t *)pg_dir, false, 0);
|
||||
map_range(&ptep, (u64)__initdata_begin, (u64)_end,
|
||||
(phys_addr_t)__initdata_begin, data_prot, IDMAP_ROOT_LEVEL,
|
||||
(pte_t *)pg_dir, false, 0);
|
||||
|
||||
return ptep;
|
||||
}
|
||||
|
||||
@ -29,9 +29,10 @@ u64 kaslr_early_init(void *fdt, int chosen);
|
||||
void relocate_kernel(u64 offset);
|
||||
int scs_patch(const u8 eh_frame[], int size);
|
||||
|
||||
void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
|
||||
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
|
||||
void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
|
||||
pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
|
||||
u64 va_offset);
|
||||
|
||||
asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
|
||||
asmlinkage void early_map_kernel(u64 boot_status, phys_addr_t fdt);
|
||||
|
||||
asmlinkage u64 create_init_idmap(pgd_t *pgd, ptdesc_t clrmask);
|
||||
asmlinkage phys_addr_t create_init_idmap(pgd_t *pgd, ptdesc_t clrmask);
|
||||
|
||||
@ -214,7 +214,7 @@ static void __init request_standard_resources(void)
|
||||
unsigned long i = 0;
|
||||
size_t res_size;
|
||||
|
||||
kernel_code.start = __pa_symbol(_stext);
|
||||
kernel_code.start = __pa_symbol(_text);
|
||||
kernel_code.end = __pa_symbol(__init_begin - 1);
|
||||
kernel_data.start = __pa_symbol(_sdata);
|
||||
kernel_data.end = __pa_symbol(_end - 1);
|
||||
@ -280,7 +280,7 @@ u64 cpu_logical_map(unsigned int cpu)
|
||||
|
||||
void __init __no_sanitize_address setup_arch(char **cmdline_p)
|
||||
{
|
||||
setup_initial_init_mm(_stext, _etext, _edata, _end);
|
||||
setup_initial_init_mm(_text, _etext, _edata, _end);
|
||||
|
||||
*cmdline_p = boot_command_line;
|
||||
|
||||
|
||||
@ -243,7 +243,7 @@ void __init arm64_memblock_init(void)
|
||||
*/
|
||||
if (memory_limit != PHYS_ADDR_MAX) {
|
||||
memblock_mem_limit_remove_map(memory_limit);
|
||||
memblock_add(__pa_symbol(_text), (u64)(_end - _text));
|
||||
memblock_add(__pa_symbol(_text), (resource_size_t)(_end - _text));
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
|
||||
@ -252,8 +252,8 @@ void __init arm64_memblock_init(void)
|
||||
* initrd to become inaccessible via the linear mapping.
|
||||
* Otherwise, this is a no-op
|
||||
*/
|
||||
u64 base = phys_initrd_start & PAGE_MASK;
|
||||
u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
|
||||
phys_addr_t base = phys_initrd_start & PAGE_MASK;
|
||||
resource_size_t size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
|
||||
|
||||
/*
|
||||
* We can only add back the initrd memory if we don't end up
|
||||
@ -279,7 +279,7 @@ void __init arm64_memblock_init(void)
|
||||
* Register the kernel text, kernel data, initrd, and initial
|
||||
* pagetables with memblock.
|
||||
*/
|
||||
memblock_reserve(__pa_symbol(_stext), _end - _stext);
|
||||
memblock_reserve(__pa_symbol(_text), _end - _text);
|
||||
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) {
|
||||
/* the generic initrd code expects virtual addresses */
|
||||
initrd_start = __phys_to_virt(phys_initrd_start);
|
||||
|
||||
@ -27,6 +27,8 @@
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/pkeys.h>
|
||||
#include <linux/mm_inline.h>
|
||||
#include <linux/pagewalk.h>
|
||||
#include <linux/stop_machine.h>
|
||||
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/cputype.h>
|
||||
@ -47,6 +49,8 @@
|
||||
#define NO_CONT_MAPPINGS BIT(1)
|
||||
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(arm64_ptdump_lock_key);
|
||||
|
||||
u64 kimage_voffset __ro_after_init;
|
||||
EXPORT_SYMBOL(kimage_voffset);
|
||||
|
||||
@ -474,14 +478,18 @@ void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt,
|
||||
int flags);
|
||||
#endif
|
||||
|
||||
static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
|
||||
#define INVALID_PHYS_ADDR (-1ULL)
|
||||
|
||||
static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm, gfp_t gfp,
|
||||
enum pgtable_type pgtable_type)
|
||||
{
|
||||
/* Page is zeroed by init_clear_pgtable() so don't duplicate effort. */
|
||||
struct ptdesc *ptdesc = pagetable_alloc(GFP_PGTABLE_KERNEL & ~__GFP_ZERO, 0);
|
||||
struct ptdesc *ptdesc = pagetable_alloc(gfp & ~__GFP_ZERO, 0);
|
||||
phys_addr_t pa;
|
||||
|
||||
BUG_ON(!ptdesc);
|
||||
if (!ptdesc)
|
||||
return INVALID_PHYS_ADDR;
|
||||
|
||||
pa = page_to_phys(ptdesc_page(ptdesc));
|
||||
|
||||
switch (pgtable_type) {
|
||||
@ -502,16 +510,392 @@ static phys_addr_t __pgd_pgtable_alloc(struct mm_struct *mm,
|
||||
return pa;
|
||||
}
|
||||
|
||||
static phys_addr_t
|
||||
try_pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type, gfp_t gfp)
|
||||
{
|
||||
return __pgd_pgtable_alloc(&init_mm, gfp, pgtable_type);
|
||||
}
|
||||
|
||||
static phys_addr_t __maybe_unused
|
||||
pgd_pgtable_alloc_init_mm(enum pgtable_type pgtable_type)
|
||||
{
|
||||
return __pgd_pgtable_alloc(&init_mm, pgtable_type);
|
||||
phys_addr_t pa;
|
||||
|
||||
pa = __pgd_pgtable_alloc(&init_mm, GFP_PGTABLE_KERNEL, pgtable_type);
|
||||
BUG_ON(pa == INVALID_PHYS_ADDR);
|
||||
return pa;
|
||||
}
|
||||
|
||||
static phys_addr_t
|
||||
pgd_pgtable_alloc_special_mm(enum pgtable_type pgtable_type)
|
||||
{
|
||||
return __pgd_pgtable_alloc(NULL, pgtable_type);
|
||||
phys_addr_t pa;
|
||||
|
||||
pa = __pgd_pgtable_alloc(NULL, GFP_PGTABLE_KERNEL, pgtable_type);
|
||||
BUG_ON(pa == INVALID_PHYS_ADDR);
|
||||
return pa;
|
||||
}
|
||||
|
||||
static void split_contpte(pte_t *ptep)
|
||||
{
|
||||
int i;
|
||||
|
||||
ptep = PTR_ALIGN_DOWN(ptep, sizeof(*ptep) * CONT_PTES);
|
||||
for (i = 0; i < CONT_PTES; i++, ptep++)
|
||||
__set_pte(ptep, pte_mknoncont(__ptep_get(ptep)));
|
||||
}
|
||||
|
||||
static int split_pmd(pmd_t *pmdp, pmd_t pmd, gfp_t gfp, bool to_cont)
|
||||
{
|
||||
pmdval_t tableprot = PMD_TYPE_TABLE | PMD_TABLE_UXN | PMD_TABLE_AF;
|
||||
unsigned long pfn = pmd_pfn(pmd);
|
||||
pgprot_t prot = pmd_pgprot(pmd);
|
||||
phys_addr_t pte_phys;
|
||||
pte_t *ptep;
|
||||
int i;
|
||||
|
||||
pte_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PTE, gfp);
|
||||
if (pte_phys == INVALID_PHYS_ADDR)
|
||||
return -ENOMEM;
|
||||
ptep = (pte_t *)phys_to_virt(pte_phys);
|
||||
|
||||
if (pgprot_val(prot) & PMD_SECT_PXN)
|
||||
tableprot |= PMD_TABLE_PXN;
|
||||
|
||||
prot = __pgprot((pgprot_val(prot) & ~PTE_TYPE_MASK) | PTE_TYPE_PAGE);
|
||||
prot = __pgprot(pgprot_val(prot) & ~PTE_CONT);
|
||||
if (to_cont)
|
||||
prot = __pgprot(pgprot_val(prot) | PTE_CONT);
|
||||
|
||||
for (i = 0; i < PTRS_PER_PTE; i++, ptep++, pfn++)
|
||||
__set_pte(ptep, pfn_pte(pfn, prot));
|
||||
|
||||
/*
|
||||
* Ensure the pte entries are visible to the table walker by the time
|
||||
* the pmd entry that points to the ptes is visible.
|
||||
*/
|
||||
dsb(ishst);
|
||||
__pmd_populate(pmdp, pte_phys, tableprot);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void split_contpmd(pmd_t *pmdp)
|
||||
{
|
||||
int i;
|
||||
|
||||
pmdp = PTR_ALIGN_DOWN(pmdp, sizeof(*pmdp) * CONT_PMDS);
|
||||
for (i = 0; i < CONT_PMDS; i++, pmdp++)
|
||||
set_pmd(pmdp, pmd_mknoncont(pmdp_get(pmdp)));
|
||||
}
|
||||
|
||||
static int split_pud(pud_t *pudp, pud_t pud, gfp_t gfp, bool to_cont)
|
||||
{
|
||||
pudval_t tableprot = PUD_TYPE_TABLE | PUD_TABLE_UXN | PUD_TABLE_AF;
|
||||
unsigned int step = PMD_SIZE >> PAGE_SHIFT;
|
||||
unsigned long pfn = pud_pfn(pud);
|
||||
pgprot_t prot = pud_pgprot(pud);
|
||||
phys_addr_t pmd_phys;
|
||||
pmd_t *pmdp;
|
||||
int i;
|
||||
|
||||
pmd_phys = try_pgd_pgtable_alloc_init_mm(TABLE_PMD, gfp);
|
||||
if (pmd_phys == INVALID_PHYS_ADDR)
|
||||
return -ENOMEM;
|
||||
pmdp = (pmd_t *)phys_to_virt(pmd_phys);
|
||||
|
||||
if (pgprot_val(prot) & PMD_SECT_PXN)
|
||||
tableprot |= PUD_TABLE_PXN;
|
||||
|
||||
prot = __pgprot((pgprot_val(prot) & ~PMD_TYPE_MASK) | PMD_TYPE_SECT);
|
||||
prot = __pgprot(pgprot_val(prot) & ~PTE_CONT);
|
||||
if (to_cont)
|
||||
prot = __pgprot(pgprot_val(prot) | PTE_CONT);
|
||||
|
||||
for (i = 0; i < PTRS_PER_PMD; i++, pmdp++, pfn += step)
|
||||
set_pmd(pmdp, pfn_pmd(pfn, prot));
|
||||
|
||||
/*
|
||||
* Ensure the pmd entries are visible to the table walker by the time
|
||||
* the pud entry that points to the pmds is visible.
|
||||
*/
|
||||
dsb(ishst);
|
||||
__pud_populate(pudp, pmd_phys, tableprot);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int split_kernel_leaf_mapping_locked(unsigned long addr)
|
||||
{
|
||||
pgd_t *pgdp, pgd;
|
||||
p4d_t *p4dp, p4d;
|
||||
pud_t *pudp, pud;
|
||||
pmd_t *pmdp, pmd;
|
||||
pte_t *ptep, pte;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* PGD: If addr is PGD aligned then addr already describes a leaf
|
||||
* boundary. If not present then there is nothing to split.
|
||||
*/
|
||||
if (ALIGN_DOWN(addr, PGDIR_SIZE) == addr)
|
||||
goto out;
|
||||
pgdp = pgd_offset_k(addr);
|
||||
pgd = pgdp_get(pgdp);
|
||||
if (!pgd_present(pgd))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* P4D: If addr is P4D aligned then addr already describes a leaf
|
||||
* boundary. If not present then there is nothing to split.
|
||||
*/
|
||||
if (ALIGN_DOWN(addr, P4D_SIZE) == addr)
|
||||
goto out;
|
||||
p4dp = p4d_offset(pgdp, addr);
|
||||
p4d = p4dp_get(p4dp);
|
||||
if (!p4d_present(p4d))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* PUD: If addr is PUD aligned then addr already describes a leaf
|
||||
* boundary. If not present then there is nothing to split. Otherwise,
|
||||
* if we have a pud leaf, split to contpmd.
|
||||
*/
|
||||
if (ALIGN_DOWN(addr, PUD_SIZE) == addr)
|
||||
goto out;
|
||||
pudp = pud_offset(p4dp, addr);
|
||||
pud = pudp_get(pudp);
|
||||
if (!pud_present(pud))
|
||||
goto out;
|
||||
if (pud_leaf(pud)) {
|
||||
ret = split_pud(pudp, pud, GFP_PGTABLE_KERNEL, true);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* CONTPMD: If addr is CONTPMD aligned then addr already describes a
|
||||
* leaf boundary. If not present then there is nothing to split.
|
||||
* Otherwise, if we have a contpmd leaf, split to pmd.
|
||||
*/
|
||||
if (ALIGN_DOWN(addr, CONT_PMD_SIZE) == addr)
|
||||
goto out;
|
||||
pmdp = pmd_offset(pudp, addr);
|
||||
pmd = pmdp_get(pmdp);
|
||||
if (!pmd_present(pmd))
|
||||
goto out;
|
||||
if (pmd_leaf(pmd)) {
|
||||
if (pmd_cont(pmd))
|
||||
split_contpmd(pmdp);
|
||||
/*
|
||||
* PMD: If addr is PMD aligned then addr already describes a
|
||||
* leaf boundary. Otherwise, split to contpte.
|
||||
*/
|
||||
if (ALIGN_DOWN(addr, PMD_SIZE) == addr)
|
||||
goto out;
|
||||
ret = split_pmd(pmdp, pmd, GFP_PGTABLE_KERNEL, true);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* CONTPTE: If addr is CONTPTE aligned then addr already describes a
|
||||
* leaf boundary. If not present then there is nothing to split.
|
||||
* Otherwise, if we have a contpte leaf, split to pte.
|
||||
*/
|
||||
if (ALIGN_DOWN(addr, CONT_PTE_SIZE) == addr)
|
||||
goto out;
|
||||
ptep = pte_offset_kernel(pmdp, addr);
|
||||
pte = __ptep_get(ptep);
|
||||
if (!pte_present(pte))
|
||||
goto out;
|
||||
if (pte_cont(pte))
|
||||
split_contpte(ptep);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static DEFINE_MUTEX(pgtable_split_lock);
|
||||
|
||||
int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* !BBML2_NOABORT systems should not be trying to change permissions on
|
||||
* anything that is not pte-mapped in the first place. Just return early
|
||||
* and let the permission change code raise a warning if not already
|
||||
* pte-mapped.
|
||||
*/
|
||||
if (!system_supports_bbml2_noabort())
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Ensure start and end are at least page-aligned since this is the
|
||||
* finest granularity we can split to.
|
||||
*/
|
||||
if (start != PAGE_ALIGN(start) || end != PAGE_ALIGN(end))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&pgtable_split_lock);
|
||||
arch_enter_lazy_mmu_mode();
|
||||
|
||||
/*
|
||||
* The split_kernel_leaf_mapping_locked() may sleep, it is not a
|
||||
* problem for ARM64 since ARM64's lazy MMU implementation allows
|
||||
* sleeping.
|
||||
*
|
||||
* Optimize for the common case of splitting out a single page from a
|
||||
* larger mapping. Here we can just split on the "least aligned" of
|
||||
* start and end and this will guarantee that there must also be a split
|
||||
* on the more aligned address since the both addresses must be in the
|
||||
* same contpte block and it must have been split to ptes.
|
||||
*/
|
||||
if (end - start == PAGE_SIZE) {
|
||||
start = __ffs(start) < __ffs(end) ? start : end;
|
||||
ret = split_kernel_leaf_mapping_locked(start);
|
||||
} else {
|
||||
ret = split_kernel_leaf_mapping_locked(start);
|
||||
if (!ret)
|
||||
ret = split_kernel_leaf_mapping_locked(end);
|
||||
}
|
||||
|
||||
arch_leave_lazy_mmu_mode();
|
||||
mutex_unlock(&pgtable_split_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr,
|
||||
unsigned long next,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
pud_t pud = pudp_get(pudp);
|
||||
int ret = 0;
|
||||
|
||||
if (pud_leaf(pud))
|
||||
ret = split_pud(pudp, pud, GFP_ATOMIC, false);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr,
|
||||
unsigned long next,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
pmd_t pmd = pmdp_get(pmdp);
|
||||
int ret = 0;
|
||||
|
||||
if (pmd_leaf(pmd)) {
|
||||
if (pmd_cont(pmd))
|
||||
split_contpmd(pmdp);
|
||||
ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false);
|
||||
|
||||
/*
|
||||
* We have split the pmd directly to ptes so there is no need to
|
||||
* visit each pte to check if they are contpte.
|
||||
*/
|
||||
walk->action = ACTION_CONTINUE;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr,
|
||||
unsigned long next,
|
||||
struct mm_walk *walk)
|
||||
{
|
||||
pte_t pte = __ptep_get(ptep);
|
||||
|
||||
if (pte_cont(pte))
|
||||
split_contpte(ptep);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops split_to_ptes_ops __initconst = {
|
||||
.pud_entry = split_to_ptes_pud_entry,
|
||||
.pmd_entry = split_to_ptes_pmd_entry,
|
||||
.pte_entry = split_to_ptes_pte_entry,
|
||||
};
|
||||
|
||||
static bool linear_map_requires_bbml2 __initdata;
|
||||
|
||||
u32 idmap_kpti_bbml2_flag;
|
||||
|
||||
void __init init_idmap_kpti_bbml2_flag(void)
|
||||
{
|
||||
WRITE_ONCE(idmap_kpti_bbml2_flag, 1);
|
||||
/* Must be visible to other CPUs before stop_machine() is called. */
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
static int __init linear_map_split_to_ptes(void *__unused)
|
||||
{
|
||||
/*
|
||||
* Repainting the linear map must be done by CPU0 (the boot CPU) because
|
||||
* that's the only CPU that we know supports BBML2. The other CPUs will
|
||||
* be held in a waiting area with the idmap active.
|
||||
*/
|
||||
if (!smp_processor_id()) {
|
||||
unsigned long lstart = _PAGE_OFFSET(vabits_actual);
|
||||
unsigned long lend = PAGE_END;
|
||||
unsigned long kstart = (unsigned long)lm_alias(_stext);
|
||||
unsigned long kend = (unsigned long)lm_alias(__init_begin);
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Wait for all secondary CPUs to be put into the waiting area.
|
||||
*/
|
||||
smp_cond_load_acquire(&idmap_kpti_bbml2_flag, VAL == num_online_cpus());
|
||||
|
||||
/*
|
||||
* Walk all of the linear map [lstart, lend), except the kernel
|
||||
* linear map alias [kstart, kend), and split all mappings to
|
||||
* PTE. The kernel alias remains static throughout runtime so
|
||||
* can continue to be safely mapped with large mappings.
|
||||
*/
|
||||
ret = walk_kernel_page_table_range_lockless(lstart, kstart,
|
||||
&split_to_ptes_ops, NULL, NULL);
|
||||
if (!ret)
|
||||
ret = walk_kernel_page_table_range_lockless(kend, lend,
|
||||
&split_to_ptes_ops, NULL, NULL);
|
||||
if (ret)
|
||||
panic("Failed to split linear map\n");
|
||||
flush_tlb_kernel_range(lstart, lend);
|
||||
|
||||
/*
|
||||
* Relies on dsb in flush_tlb_kernel_range() to avoid reordering
|
||||
* before any page table split operations.
|
||||
*/
|
||||
WRITE_ONCE(idmap_kpti_bbml2_flag, 0);
|
||||
} else {
|
||||
typedef void (wait_split_fn)(void);
|
||||
extern wait_split_fn wait_linear_map_split_to_ptes;
|
||||
wait_split_fn *wait_fn;
|
||||
|
||||
wait_fn = (void *)__pa_symbol(wait_linear_map_split_to_ptes);
|
||||
|
||||
/*
|
||||
* At least one secondary CPU doesn't support BBML2 so cannot
|
||||
* tolerate the size of the live mappings changing. So have the
|
||||
* secondary CPUs wait for the boot CPU to make the changes
|
||||
* with the idmap active and init_mm inactive.
|
||||
*/
|
||||
cpu_install_idmap();
|
||||
wait_fn();
|
||||
cpu_uninstall_idmap();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init linear_map_maybe_split_to_ptes(void)
|
||||
{
|
||||
if (linear_map_requires_bbml2 && !system_supports_bbml2_noabort()) {
|
||||
init_idmap_kpti_bbml2_flag();
|
||||
stop_machine(linear_map_split_to_ptes, NULL, cpu_online_mask);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -574,8 +958,8 @@ void __init mark_linear_text_alias_ro(void)
|
||||
/*
|
||||
* Remove the write permissions from the linear alias of .text/.rodata
|
||||
*/
|
||||
update_mapping_prot(__pa_symbol(_stext), (unsigned long)lm_alias(_stext),
|
||||
(unsigned long)__init_begin - (unsigned long)_stext,
|
||||
update_mapping_prot(__pa_symbol(_text), (unsigned long)lm_alias(_text),
|
||||
(unsigned long)__init_begin - (unsigned long)_text,
|
||||
PAGE_KERNEL_RO);
|
||||
}
|
||||
|
||||
@ -633,10 +1017,20 @@ static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) {
|
||||
|
||||
#endif /* CONFIG_KFENCE */
|
||||
|
||||
static inline bool force_pte_mapping(void)
|
||||
{
|
||||
bool bbml2 = system_capabilities_finalized() ?
|
||||
system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort();
|
||||
|
||||
return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() ||
|
||||
is_realm_world())) ||
|
||||
debug_pagealloc_enabled();
|
||||
}
|
||||
|
||||
static void __init map_mem(pgd_t *pgdp)
|
||||
{
|
||||
static const u64 direct_map_end = _PAGE_END(VA_BITS_MIN);
|
||||
phys_addr_t kernel_start = __pa_symbol(_stext);
|
||||
phys_addr_t kernel_start = __pa_symbol(_text);
|
||||
phys_addr_t kernel_end = __pa_symbol(__init_begin);
|
||||
phys_addr_t start, end;
|
||||
phys_addr_t early_kfence_pool;
|
||||
@ -658,7 +1052,9 @@ static void __init map_mem(pgd_t *pgdp)
|
||||
|
||||
early_kfence_pool = arm64_kfence_alloc_pool();
|
||||
|
||||
if (can_set_direct_map())
|
||||
linear_map_requires_bbml2 = !force_pte_mapping() && can_set_direct_map();
|
||||
|
||||
if (force_pte_mapping())
|
||||
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
|
||||
|
||||
/*
|
||||
@ -683,7 +1079,7 @@ static void __init map_mem(pgd_t *pgdp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Map the linear alias of the [_stext, __init_begin) interval
|
||||
* Map the linear alias of the [_text, __init_begin) interval
|
||||
* as non-executable now, and remove the write permission in
|
||||
* mark_linear_text_alias_ro() below (which will be called after
|
||||
* alternative patching has completed). This makes the contents
|
||||
@ -710,6 +1106,10 @@ void mark_rodata_ro(void)
|
||||
WRITE_ONCE(rodata_is_rw, false);
|
||||
update_mapping_prot(__pa_symbol(__start_rodata), (unsigned long)__start_rodata,
|
||||
section_size, PAGE_KERNEL_RO);
|
||||
/* mark the range between _text and _stext as read only. */
|
||||
update_mapping_prot(__pa_symbol(_text), (unsigned long)_text,
|
||||
(unsigned long)_stext - (unsigned long)_text,
|
||||
PAGE_KERNEL_RO);
|
||||
}
|
||||
|
||||
static void __init declare_vma(struct vm_struct *vma,
|
||||
@ -780,38 +1180,41 @@ static void __init declare_kernel_vmas(void)
|
||||
{
|
||||
static struct vm_struct vmlinux_seg[KERNEL_SEGMENT_COUNT];
|
||||
|
||||
declare_vma(&vmlinux_seg[0], _stext, _etext, VM_NO_GUARD);
|
||||
declare_vma(&vmlinux_seg[0], _text, _etext, VM_NO_GUARD);
|
||||
declare_vma(&vmlinux_seg[1], __start_rodata, __inittext_begin, VM_NO_GUARD);
|
||||
declare_vma(&vmlinux_seg[2], __inittext_begin, __inittext_end, VM_NO_GUARD);
|
||||
declare_vma(&vmlinux_seg[3], __initdata_begin, __initdata_end, VM_NO_GUARD);
|
||||
declare_vma(&vmlinux_seg[4], _data, _end, 0);
|
||||
}
|
||||
|
||||
void __pi_map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
|
||||
int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
|
||||
void __pi_map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa,
|
||||
pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,
|
||||
u64 va_offset);
|
||||
|
||||
static u8 idmap_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init,
|
||||
kpti_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
|
||||
kpti_bbml2_ptes[IDMAP_LEVELS - 1][PAGE_SIZE] __aligned(PAGE_SIZE) __ro_after_init;
|
||||
|
||||
static void __init create_idmap(void)
|
||||
{
|
||||
u64 start = __pa_symbol(__idmap_text_start);
|
||||
u64 end = __pa_symbol(__idmap_text_end);
|
||||
u64 ptep = __pa_symbol(idmap_ptes);
|
||||
phys_addr_t start = __pa_symbol(__idmap_text_start);
|
||||
phys_addr_t end = __pa_symbol(__idmap_text_end);
|
||||
phys_addr_t ptep = __pa_symbol(idmap_ptes);
|
||||
|
||||
__pi_map_range(&ptep, start, end, start, PAGE_KERNEL_ROX,
|
||||
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
|
||||
__phys_to_virt(ptep) - ptep);
|
||||
|
||||
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings) {
|
||||
extern u32 __idmap_kpti_flag;
|
||||
u64 pa = __pa_symbol(&__idmap_kpti_flag);
|
||||
if (linear_map_requires_bbml2 ||
|
||||
(IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) && !arm64_use_ng_mappings)) {
|
||||
phys_addr_t pa = __pa_symbol(&idmap_kpti_bbml2_flag);
|
||||
|
||||
/*
|
||||
* The KPTI G-to-nG conversion code needs a read-write mapping
|
||||
* of its synchronization flag in the ID map.
|
||||
* of its synchronization flag in the ID map. This is also used
|
||||
* when splitting the linear map to ptes if a secondary CPU
|
||||
* doesn't support bbml2.
|
||||
*/
|
||||
ptep = __pa_symbol(kpti_ptes);
|
||||
ptep = __pa_symbol(kpti_bbml2_ptes);
|
||||
__pi_map_range(&ptep, pa, pa + sizeof(u32), pa, PAGE_KERNEL,
|
||||
IDMAP_ROOT_LEVEL, (pte_t *)idmap_pg_dir, false,
|
||||
__phys_to_virt(ptep) - ptep);
|
||||
@ -1261,7 +1664,8 @@ int pmd_clear_huge(pmd_t *pmdp)
|
||||
return 1;
|
||||
}
|
||||
|
||||
int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
|
||||
static int __pmd_free_pte_page(pmd_t *pmdp, unsigned long addr,
|
||||
bool acquire_mmap_lock)
|
||||
{
|
||||
pte_t *table;
|
||||
pmd_t pmd;
|
||||
@ -1273,13 +1677,25 @@ int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* See comment in pud_free_pmd_page for static key logic */
|
||||
table = pte_offset_kernel(pmdp, addr);
|
||||
pmd_clear(pmdp);
|
||||
__flush_tlb_kernel_pgtable(addr);
|
||||
if (static_branch_unlikely(&arm64_ptdump_lock_key) && acquire_mmap_lock) {
|
||||
mmap_read_lock(&init_mm);
|
||||
mmap_read_unlock(&init_mm);
|
||||
}
|
||||
|
||||
pte_free_kernel(NULL, table);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
|
||||
{
|
||||
/* If ptdump is walking the pagetables, acquire init_mm.mmap_lock */
|
||||
return __pmd_free_pte_page(pmdp, addr, /* acquire_mmap_lock = */ true);
|
||||
}
|
||||
|
||||
int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
|
||||
{
|
||||
pmd_t *table;
|
||||
@ -1295,16 +1711,36 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
|
||||
}
|
||||
|
||||
table = pmd_offset(pudp, addr);
|
||||
|
||||
/*
|
||||
* Our objective is to prevent ptdump from reading a PMD table which has
|
||||
* been freed. In this race, if pud_free_pmd_page observes the key on
|
||||
* (which got flipped by ptdump) then the mmap lock sequence here will,
|
||||
* as a result of the mmap write lock/unlock sequence in ptdump, give
|
||||
* us the correct synchronization. If not, this means that ptdump has
|
||||
* yet not started walking the pagetables - the sequence of barriers
|
||||
* issued by __flush_tlb_kernel_pgtable() guarantees that ptdump will
|
||||
* observe an empty PUD.
|
||||
*/
|
||||
pud_clear(pudp);
|
||||
__flush_tlb_kernel_pgtable(addr);
|
||||
if (static_branch_unlikely(&arm64_ptdump_lock_key)) {
|
||||
mmap_read_lock(&init_mm);
|
||||
mmap_read_unlock(&init_mm);
|
||||
}
|
||||
|
||||
pmdp = table;
|
||||
next = addr;
|
||||
end = addr + PUD_SIZE;
|
||||
do {
|
||||
if (pmd_present(pmdp_get(pmdp)))
|
||||
pmd_free_pte_page(pmdp, next);
|
||||
/*
|
||||
* PMD has been isolated, so ptdump won't see it. No
|
||||
* need to acquire init_mm.mmap_lock.
|
||||
*/
|
||||
__pmd_free_pte_page(pmdp, next, /* acquire_mmap_lock = */ false);
|
||||
} while (pmdp++, next += PMD_SIZE, next != end);
|
||||
|
||||
pud_clear(pudp);
|
||||
__flush_tlb_kernel_pgtable(addr);
|
||||
pmd_free(NULL, table);
|
||||
return 1;
|
||||
}
|
||||
@ -1324,8 +1760,8 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
|
||||
struct range arch_get_mappable_range(void)
|
||||
{
|
||||
struct range mhp_range;
|
||||
u64 start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual));
|
||||
u64 end_linear_pa = __pa(PAGE_END - 1);
|
||||
phys_addr_t start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual));
|
||||
phys_addr_t end_linear_pa = __pa(PAGE_END - 1);
|
||||
|
||||
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
|
||||
/*
|
||||
@ -1360,7 +1796,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
|
||||
|
||||
VM_BUG_ON(!mhp_range_allowed(start, size, true));
|
||||
|
||||
if (can_set_direct_map())
|
||||
if (force_pte_mapping())
|
||||
flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
|
||||
|
||||
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
|
||||
|
||||
@ -8,6 +8,7 @@
|
||||
#include <linux/mem_encrypt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pagewalk.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/pgtable-prot.h>
|
||||
@ -20,7 +21,66 @@ struct page_change_data {
|
||||
pgprot_t clear_mask;
|
||||
};
|
||||
|
||||
bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED);
|
||||
static ptdesc_t set_pageattr_masks(ptdesc_t val, struct mm_walk *walk)
|
||||
{
|
||||
struct page_change_data *masks = walk->private;
|
||||
|
||||
val &= ~(pgprot_val(masks->clear_mask));
|
||||
val |= (pgprot_val(masks->set_mask));
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
|
||||
unsigned long next, struct mm_walk *walk)
|
||||
{
|
||||
pud_t val = pudp_get(pud);
|
||||
|
||||
if (pud_sect(val)) {
|
||||
if (WARN_ON_ONCE((next - addr) != PUD_SIZE))
|
||||
return -EINVAL;
|
||||
val = __pud(set_pageattr_masks(pud_val(val), walk));
|
||||
set_pud(pud, val);
|
||||
walk->action = ACTION_CONTINUE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
|
||||
unsigned long next, struct mm_walk *walk)
|
||||
{
|
||||
pmd_t val = pmdp_get(pmd);
|
||||
|
||||
if (pmd_sect(val)) {
|
||||
if (WARN_ON_ONCE((next - addr) != PMD_SIZE))
|
||||
return -EINVAL;
|
||||
val = __pmd(set_pageattr_masks(pmd_val(val), walk));
|
||||
set_pmd(pmd, val);
|
||||
walk->action = ACTION_CONTINUE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
|
||||
unsigned long next, struct mm_walk *walk)
|
||||
{
|
||||
pte_t val = __ptep_get(pte);
|
||||
|
||||
val = __pte(set_pageattr_masks(pte_val(val), walk));
|
||||
__set_pte(pte, val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct mm_walk_ops pageattr_ops = {
|
||||
.pud_entry = pageattr_pud_entry,
|
||||
.pmd_entry = pageattr_pmd_entry,
|
||||
.pte_entry = pageattr_pte_entry,
|
||||
};
|
||||
|
||||
bool rodata_full __ro_after_init = true;
|
||||
|
||||
bool can_set_direct_map(void)
|
||||
{
|
||||
@ -37,23 +97,8 @@ bool can_set_direct_map(void)
|
||||
arm64_kfence_can_set_direct_map() || is_realm_world();
|
||||
}
|
||||
|
||||
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
|
||||
{
|
||||
struct page_change_data *cdata = data;
|
||||
pte_t pte = __ptep_get(ptep);
|
||||
|
||||
pte = clear_pte_bit(pte, cdata->clear_mask);
|
||||
pte = set_pte_bit(pte, cdata->set_mask);
|
||||
|
||||
__set_pte(ptep, pte);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function assumes that the range is mapped with PAGE_SIZE pages.
|
||||
*/
|
||||
static int __change_memory_common(unsigned long start, unsigned long size,
|
||||
pgprot_t set_mask, pgprot_t clear_mask)
|
||||
static int update_range_prot(unsigned long start, unsigned long size,
|
||||
pgprot_t set_mask, pgprot_t clear_mask)
|
||||
{
|
||||
struct page_change_data data;
|
||||
int ret;
|
||||
@ -61,8 +106,30 @@ static int __change_memory_common(unsigned long start, unsigned long size,
|
||||
data.set_mask = set_mask;
|
||||
data.clear_mask = clear_mask;
|
||||
|
||||
ret = apply_to_page_range(&init_mm, start, size, change_page_range,
|
||||
&data);
|
||||
ret = split_kernel_leaf_mapping(start, start + size);
|
||||
if (WARN_ON_ONCE(ret))
|
||||
return ret;
|
||||
|
||||
arch_enter_lazy_mmu_mode();
|
||||
|
||||
/*
|
||||
* The caller must ensure that the range we are operating on does not
|
||||
* partially overlap a block mapping, or a cont mapping. Any such case
|
||||
* must be eliminated by splitting the mapping.
|
||||
*/
|
||||
ret = walk_kernel_page_table_range_lockless(start, start + size,
|
||||
&pageattr_ops, NULL, &data);
|
||||
arch_leave_lazy_mmu_mode();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __change_memory_common(unsigned long start, unsigned long size,
|
||||
pgprot_t set_mask, pgprot_t clear_mask)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = update_range_prot(start, size, set_mask, clear_mask);
|
||||
|
||||
/*
|
||||
* If the memory is being made valid without changing any other bits
|
||||
@ -174,32 +241,26 @@ int set_memory_valid(unsigned long addr, int numpages, int enable)
|
||||
|
||||
int set_direct_map_invalid_noflush(struct page *page)
|
||||
{
|
||||
struct page_change_data data = {
|
||||
.set_mask = __pgprot(0),
|
||||
.clear_mask = __pgprot(PTE_VALID),
|
||||
};
|
||||
pgprot_t clear_mask = __pgprot(PTE_VALID);
|
||||
pgprot_t set_mask = __pgprot(0);
|
||||
|
||||
if (!can_set_direct_map())
|
||||
return 0;
|
||||
|
||||
return apply_to_page_range(&init_mm,
|
||||
(unsigned long)page_address(page),
|
||||
PAGE_SIZE, change_page_range, &data);
|
||||
return update_range_prot((unsigned long)page_address(page),
|
||||
PAGE_SIZE, set_mask, clear_mask);
|
||||
}
|
||||
|
||||
int set_direct_map_default_noflush(struct page *page)
|
||||
{
|
||||
struct page_change_data data = {
|
||||
.set_mask = __pgprot(PTE_VALID | PTE_WRITE),
|
||||
.clear_mask = __pgprot(PTE_RDONLY),
|
||||
};
|
||||
pgprot_t set_mask = __pgprot(PTE_VALID | PTE_WRITE);
|
||||
pgprot_t clear_mask = __pgprot(PTE_RDONLY);
|
||||
|
||||
if (!can_set_direct_map())
|
||||
return 0;
|
||||
|
||||
return apply_to_page_range(&init_mm,
|
||||
(unsigned long)page_address(page),
|
||||
PAGE_SIZE, change_page_range, &data);
|
||||
return update_range_prot((unsigned long)page_address(page),
|
||||
PAGE_SIZE, set_mask, clear_mask);
|
||||
}
|
||||
|
||||
static int __set_memory_enc_dec(unsigned long addr,
|
||||
|
||||
@ -245,10 +245,6 @@ SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
|
||||
*
|
||||
* Called exactly once from stop_machine context by each CPU found during boot.
|
||||
*/
|
||||
.pushsection ".data", "aw", %progbits
|
||||
SYM_DATA(__idmap_kpti_flag, .long 1)
|
||||
.popsection
|
||||
|
||||
SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
|
||||
cpu .req w0
|
||||
temp_pte .req x0
|
||||
@ -273,7 +269,7 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings)
|
||||
|
||||
mov x5, x3 // preserve temp_pte arg
|
||||
mrs swapper_ttb, ttbr1_el1
|
||||
adr_l flag_ptr, __idmap_kpti_flag
|
||||
adr_l flag_ptr, idmap_kpti_bbml2_flag
|
||||
|
||||
cbnz cpu, __idmap_kpti_secondary
|
||||
|
||||
@ -416,7 +412,25 @@ alternative_else_nop_endif
|
||||
__idmap_kpti_secondary:
|
||||
/* Uninstall swapper before surgery begins */
|
||||
__idmap_cpu_set_reserved_ttbr1 x16, x17
|
||||
b scondary_cpu_wait
|
||||
|
||||
.unreq swapper_ttb
|
||||
.unreq flag_ptr
|
||||
SYM_FUNC_END(idmap_kpti_install_ng_mappings)
|
||||
.popsection
|
||||
#endif
|
||||
|
||||
.pushsection ".idmap.text", "a"
|
||||
SYM_TYPED_FUNC_START(wait_linear_map_split_to_ptes)
|
||||
/* Must be same registers as in idmap_kpti_install_ng_mappings */
|
||||
swapper_ttb .req x3
|
||||
flag_ptr .req x4
|
||||
|
||||
mrs swapper_ttb, ttbr1_el1
|
||||
adr_l flag_ptr, idmap_kpti_bbml2_flag
|
||||
__idmap_cpu_set_reserved_ttbr1 x16, x17
|
||||
|
||||
scondary_cpu_wait:
|
||||
/* Increment the flag to let the boot CPU we're ready */
|
||||
1: ldxr w16, [flag_ptr]
|
||||
add w16, w16, #1
|
||||
@ -436,9 +450,8 @@ __idmap_kpti_secondary:
|
||||
|
||||
.unreq swapper_ttb
|
||||
.unreq flag_ptr
|
||||
SYM_FUNC_END(idmap_kpti_install_ng_mappings)
|
||||
SYM_FUNC_END(wait_linear_map_split_to_ptes)
|
||||
.popsection
|
||||
#endif
|
||||
|
||||
/*
|
||||
* __cpu_setup
|
||||
|
||||
@ -283,6 +283,13 @@ void note_page_flush(struct ptdump_state *pt_st)
|
||||
note_page(pt_st, 0, -1, pte_val(pte_zero));
|
||||
}
|
||||
|
||||
static void arm64_ptdump_walk_pgd(struct ptdump_state *st, struct mm_struct *mm)
|
||||
{
|
||||
static_branch_inc(&arm64_ptdump_lock_key);
|
||||
ptdump_walk_pgd(st, mm, NULL);
|
||||
static_branch_dec(&arm64_ptdump_lock_key);
|
||||
}
|
||||
|
||||
void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
|
||||
{
|
||||
unsigned long end = ~0UL;
|
||||
@ -311,7 +318,7 @@ void ptdump_walk(struct seq_file *s, struct ptdump_info *info)
|
||||
}
|
||||
};
|
||||
|
||||
ptdump_walk_pgd(&st.ptdump, info->mm, NULL);
|
||||
arm64_ptdump_walk_pgd(&st.ptdump, info->mm);
|
||||
}
|
||||
|
||||
static void __init ptdump_initialize(void)
|
||||
@ -353,7 +360,7 @@ bool ptdump_check_wx(void)
|
||||
}
|
||||
};
|
||||
|
||||
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
|
||||
arm64_ptdump_walk_pgd(&st.ptdump, &init_mm);
|
||||
|
||||
if (st.wx_pages || st.uxn_pages) {
|
||||
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n",
|
||||
|
||||
@ -134,6 +134,9 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
|
||||
int walk_kernel_page_table_range(unsigned long start,
|
||||
unsigned long end, const struct mm_walk_ops *ops,
|
||||
pgd_t *pgd, void *private);
|
||||
int walk_kernel_page_table_range_lockless(unsigned long start,
|
||||
unsigned long end, const struct mm_walk_ops *ops,
|
||||
pgd_t *pgd, void *private);
|
||||
int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end, const struct mm_walk_ops *ops,
|
||||
void *private);
|
||||
|
||||
@ -606,10 +606,32 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
|
||||
int walk_kernel_page_table_range(unsigned long start, unsigned long end,
|
||||
const struct mm_walk_ops *ops, pgd_t *pgd, void *private)
|
||||
{
|
||||
struct mm_struct *mm = &init_mm;
|
||||
/*
|
||||
* Kernel intermediate page tables are usually not freed, so the mmap
|
||||
* read lock is sufficient. But there are some exceptions.
|
||||
* E.g. memory hot-remove. In which case, the mmap lock is insufficient
|
||||
* to prevent the intermediate kernel pages tables belonging to the
|
||||
* specified address range from being freed. The caller should take
|
||||
* other actions to prevent this race.
|
||||
*/
|
||||
mmap_assert_locked(&init_mm);
|
||||
|
||||
return walk_kernel_page_table_range_lockless(start, end, ops, pgd,
|
||||
private);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use this function to walk the kernel page tables locklessly. It should be
|
||||
* guaranteed that the caller has exclusive access over the range they are
|
||||
* operating on - that there should be no concurrent access, for example,
|
||||
* changing permissions for vmalloc objects.
|
||||
*/
|
||||
int walk_kernel_page_table_range_lockless(unsigned long start, unsigned long end,
|
||||
const struct mm_walk_ops *ops, pgd_t *pgd, void *private)
|
||||
{
|
||||
struct mm_walk walk = {
|
||||
.ops = ops,
|
||||
.mm = mm,
|
||||
.mm = &init_mm,
|
||||
.pgd = pgd,
|
||||
.private = private,
|
||||
.no_vma = true
|
||||
@ -620,16 +642,6 @@ int walk_kernel_page_table_range(unsigned long start, unsigned long end,
|
||||
if (!check_ops_valid(ops))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Kernel intermediate page tables are usually not freed, so the mmap
|
||||
* read lock is sufficient. But there are some exceptions.
|
||||
* E.g. memory hot-remove. In which case, the mmap lock is insufficient
|
||||
* to prevent the intermediate kernel pages tables belonging to the
|
||||
* specified address range from being freed. The caller should take
|
||||
* other actions to prevent this race.
|
||||
*/
|
||||
mmap_assert_locked(mm);
|
||||
|
||||
return walk_pgd_range(start, end, &walk);
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user