FROMGIT: arm64: Work around Cortex-A510 erratum 2454944

Cortex-A510 erratum 2454944 may cause clean cache lines to be
erroneously written back to memory, breaking the assumptions we rely on
for non-coherent DMA. Try to mitigate this by implementing special DMA
ops that do their best to avoid cacheable aliases via a combination of
bounce-buffering and manipulating the linear map directly, to minimise
the chance of DMA-mapped pages being speculated back into caches.

The other main concern is initial entry, where cache lines covering the
kernel image might potentially become affected between being cleaned by
the bootloader and the kernel being called, so perform some additional
maintenance to be safe in that regard too. Cortex-A510 supports S2FWB,
so KVM should be unaffected.

Bug: 223346425
(cherry picked from commit 5bb88dd8ed70973eeb15722710a46d60951c8255
 https://git.gitlab.arm.com/linux-arm/linux-rm.git arm64/2454944)
Change-Id: Iffd38bf97114f7151f01c70750b465fc991c89c8
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Beata Michalska <beata.michalska@arm.com>
This commit is contained in:
Robin Murphy
2022-04-25 17:53:55 +01:00
committed by Treehugger Robot
parent abb277d9f1
commit 02e1387801
10 changed files with 686 additions and 6 deletions
+2
View File
@@ -94,6 +94,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A510 | #2454944 | ARM64_ERRATUM_2454944 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
+30
View File
@@ -713,6 +713,36 @@ config ARM64_ERRATUM_2067961
If unsure, say Y.
config ARM64_ERRATUM_2454944
bool "Cortex-A510: 2454944: Unmodified cache line might be written back to memory"
select ARCH_HAS_TEARDOWN_DMA_OPS
default y
help
This option adds the workaround for ARM Cortex-A510 erratum 2454944.
Affected Cortex-A510 core might write unmodified cache lines back to
memory, which breaks the assumptions upon which software coherency
management for non-coherent DMA relies. If a cache line is
speculatively fetched while a non-coherent device is writing directly
to DRAM, and subsequently written back by natural eviction, data
written by the device in the intervening period can be lost.
The workaround is to enforce as far as reasonably possible that all
non-coherent DMA transfers are bounced and/or remapped to minimise
the chance that any Cacheable alias exists through which speculative
cache fills could occur.
This is quite involved and has unavoidable performance impact on
affected systems.
config ARM64_ERRATUM_2454944_DEBUG
bool "Extra debug checks for Cortex-A510 2454944"
depends on ARM64_ERRATUM_2454944
default y
help
Enable additional checks and warnings to detect and mitigate driver
bugs breaking the remapping workaround.
config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
+2 -1
View File
@@ -72,7 +72,8 @@
#define ARM64_WORKAROUND_TSB_FLUSH_FAILURE 61
#define ARM64_SPECTRE_BHB 62
/* kabi: reserve 63 - 76 for future cpu capabilities */
/* kabi: reserve 63 - 74 for future cpu capabilities */
#define ARM64_WORKAROUND_NO_DMA_ALIAS 75
#define ARM64_NCAPS 76
#endif /* __ASM_CPUCAPS_H */
+13
View File
@@ -43,6 +43,19 @@ typedef struct page *pgtable_t;
extern int pfn_valid(unsigned long);
#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG
#include <asm/cpufeature.h>
void page_check_nc(struct page *page, int order);
static inline void arch_free_page(struct page *page, int order)
{
if (cpus_have_const_cap(ARM64_WORKAROUND_NO_DMA_ALIAS))
page_check_nc(page, order);
}
#define HAVE_ARCH_FREE_PAGE
#endif
#include <asm/memory.h>
#endif /* !__ASSEMBLY__ */
+4
View File
@@ -1,4 +1,8 @@
#ifndef _ASM_ARM64_VMALLOC_H
#define _ASM_ARM64_VMALLOC_H
#include <asm/cpufeature.h>
#define arch_disable_lazy_vunmap cpus_have_const_cap(ARM64_WORKAROUND_NO_DMA_ALIAS)
#endif /* _ASM_ARM64_VMALLOC_H */
+8
View File
@@ -555,6 +555,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
.capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE,
ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus),
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_2454944
{
.desc = "ARM erratum 2454944",
.capability = ARM64_WORKAROUND_NO_DMA_ALIAS,
ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
MIDR_FIXED(MIDR_CPU_VAR_REV(1, 1), BIT(25)),
},
#endif
{
}
+20 -3
View File
@@ -125,9 +125,13 @@ SYM_CODE_END(primary_entry)
SYM_CODE_START_LOCAL(preserve_boot_args)
mov x21, x0 // x21=FDT
adr_l x0, boot_args // record the contents of
stp x21, x1, [x0] // x0 .. x3 at kernel entry
stp x2, x3, [x0, #16]
adr_l x0, boot_args
#ifdef CONFIG_ARM64_ERRATUM_2454944
dc ivac, x0 // Cortex-A510 CWG is 64 bytes, so plenty
dsb sy
#endif
stp x21, x1, [x0] // record the contents of
stp x2, x3, [x0, #16] // x0 .. x3 at kernel entry
dmb sy // needed before dc ivac with
// MMU off
@@ -282,8 +286,17 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
* the kernel image, and thus are clean to the PoC per the boot
* protocol.
*/
#ifndef CONFIG_ARM64_ERRATUM_2454944
adrp x0, init_pg_dir
adrp x1, init_pg_end
#else
/*
* However if we can't even trust "clean" cache lines shadowing rodata,
* then nuke the entire image. It's the only way to be sure.
*/
adrp x0, _text
adrp x1, _end
#endif
sub x1, x1, x0
bl __inval_dcache_area
@@ -531,6 +544,10 @@ SYM_FUNC_END(init_kernel_el)
*/
SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
adr_l x1, __boot_cpu_mode
#ifdef CONFIG_ARM64_ERRATUM_2454944
dc ivac, x1
dsb sy
#endif
cmp w0, #BOOT_CPU_MODE_EL2
b.ne 1f
add x1, x1, #4
+604
View File
@@ -31,7 +31,599 @@ void arch_dma_prep_coherent(struct page *page, size_t size)
__dma_flush_area(page_address(page), size);
}
#ifdef CONFIG_ARM64_ERRATUM_2454944
#include <linux/dma-direct.h>
#include <linux/slab.h>
#include <linux/swiotlb.h>
#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG
#include <linux/debugfs.h>
static atomic_t pages_remapped;
static int __init remap_debugfs_register(void)
{
debugfs_create_atomic_t("pages_remapped_nc", 0444, NULL, &pages_remapped);
return 0;
}
arch_initcall(remap_debugfs_register);
#endif
/*
* Nobody should be using these software bits on linear map addresses, right?
* This is categorically the worst, but oh well, needs must...
*/
#define REFCOUNT_INC (1UL << 55)
#define PTE_REFCOUNT(pte) (((pte) >> 55) & 0xf)
static int pte_set_nc(pte_t *ptep, unsigned long addr, void *data)
{
pteval_t old_pte, new_pte, pte;
unsigned int refcount;
pte = pte_val(READ_ONCE(*ptep));
do {
/* Avoid racing against the transient invalid state */
old_pte = pte | PTE_VALID;
new_pte = old_pte + REFCOUNT_INC;
refcount = PTE_REFCOUNT(pte);
if (WARN_ON(refcount == 15))
return -EINVAL;
if (refcount == 0) {
new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID);
new_pte |= PTE_ATTRINDX(MT_NORMAL_NC);
}
pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte);
} while (pte != old_pte);
*(unsigned int *)data = refcount;
if (refcount)
return 0;
#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG
atomic_inc(&pages_remapped);
#endif
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID));
return 0;
}
static int pte_clear_nc(pte_t *ptep, unsigned long addr, void *data)
{
pteval_t old_pte, new_pte, pte;
unsigned int refcount;
pte = pte_val(READ_ONCE(*ptep));
do {
old_pte = pte | PTE_VALID;
new_pte = old_pte - REFCOUNT_INC;
refcount = PTE_REFCOUNT(pte);
if (WARN_ON(refcount == 0))
return -EINVAL;
if (refcount == 1) {
new_pte &= ~(PTE_ATTRINDX_MASK | PTE_VALID);
new_pte |= PTE_ATTRINDX(MT_NORMAL_TAGGED);
}
pte = cmpxchg_relaxed(&pte_val(*ptep), old_pte, new_pte);
} while (pte != old_pte);
if (refcount > 1)
return 0;
#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG
atomic_dec(&pages_remapped);
#endif
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
WRITE_ONCE(*ptep, __pte(new_pte | PTE_VALID));
return 0;
}
static int set_nc(void *addr, size_t size)
{
unsigned int count;
int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr,
size, pte_set_nc, &count);
WARN_RATELIMIT(IS_ENABLED(CONFIG_ARM64_ERRATUM_2454944_DEBUG) &&
count == 0 && page_mapped(virt_to_page(addr)),
"changing linear mapping but cacheable aliases may still exist\n");
dsb(ishst);
isb();
__flush_dcache_area(addr, size);
return ret;
}
static int clear_nc(void *addr, size_t size)
{
int ret = apply_to_existing_page_range(&init_mm, (unsigned long)addr,
size, pte_clear_nc, NULL);
dsb(ishst);
isb();
__inval_dcache_area(addr, size);
return ret;
}
#ifdef CONFIG_ARM64_ERRATUM_2454944_DEBUG
void page_check_nc(struct page *page, int order)
{
pgd_t *pgdp, pgd;
p4d_t *p4dp, p4d;
pud_t *pudp, pud;
pmd_t *pmdp, pmd;
pte_t *ptep, pte;
unsigned long addr = (unsigned long)page_address(page);
int i, j;
pgdp = pgd_offset(&init_mm, addr);
pgd = READ_ONCE(*pgdp);
p4dp = p4d_offset(pgdp, addr);
p4d = READ_ONCE(*p4dp);
pudp = pud_offset(p4dp, addr);
pud = READ_ONCE(*pudp);
pmdp = pmd_offset(pudp, addr);
pmd = READ_ONCE(*pmdp);
for (i = 0; i < (1 << order); i++) {
ptep = pte_offset_map(pmdp, addr);
pte = READ_ONCE(*ptep);
j = PTE_REFCOUNT(pte_val(pte));
WARN(j, "Non-Cacheable page leaked! I'm fixing it up but it means you have a bug elsewhere");
while (j--)
pte_clear_nc(ptep, addr, NULL);
addr += PAGE_SIZE;
}
}
#endif /* CONFIG_ARM64_ERRATUM_2454944_DEBUG */
static phys_addr_t __arm64_noalias_map(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir,
unsigned long attrs, bool bounce)
{
bounce = bounce || (phys | size) & ~PAGE_MASK;
if (bounce) {
phys = swiotlb_tbl_map_single(dev, phys, size, PAGE_ALIGN(size),
dir, attrs);
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
}
if (set_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size)))
goto out_unmap;
return phys;
out_unmap:
if (bounce)
swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
return DMA_MAPPING_ERROR;
}
static void __arm64_noalias_unmap(struct device *dev, phys_addr_t phys, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
clear_nc(phys_to_virt(phys & PAGE_MASK), PAGE_ALIGN(size));
if (is_swiotlb_buffer(phys))
swiotlb_tbl_unmap_single(dev, phys, size, PAGE_ALIGN(size), dir, attrs);
}
static void __arm64_noalias_sync_for_device(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir)
{
if (is_swiotlb_buffer(phys))
swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE);
else
arch_sync_dma_for_device(phys, size, dir);
}
static void __arm64_noalias_sync_for_cpu(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir)
{
if (is_swiotlb_buffer(phys))
swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
else
arch_sync_dma_for_cpu(phys, size, dir);
}
static void *arm64_noalias_alloc(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs)
{
struct page *page;
void *ret;
if (attrs & DMA_ATTR_NO_WARN)
gfp |= __GFP_NOWARN;
size = PAGE_ALIGN(size);
page = dma_direct_alloc_pages(dev, size, dma_addr, 0, gfp & ~__GFP_ZERO);
if (!page)
return NULL;
ret = page_address(page);
if (set_nc(ret, size)) {
dma_direct_free_pages(dev, size, page, *dma_addr, 0);
return NULL;
}
return ret;
}
static void arm64_noalias_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
size = PAGE_ALIGN(size);
clear_nc(cpu_addr, size);
dma_direct_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0);
}
static dma_addr_t arm64_noalias_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
phys_addr_t phys = page_to_phys(page) + offset;
bool bounce = !dma_capable(dev, phys_to_dma(dev, phys), size, true);
if (!bounce && dir == DMA_TO_DEVICE) {
arch_sync_dma_for_device(phys, size, dir);
return phys_to_dma(dev, phys);
}
bounce = bounce || page_mapped(page);
phys = __arm64_noalias_map(dev, phys, size, dir, attrs, bounce);
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
return phys_to_dma(dev, phys);
}
static void arm64_noalias_unmap_page(struct device *dev, dma_addr_t dma_addr,
size_t size, enum dma_data_direction dir,
unsigned long attrs)
{
if (dir == DMA_TO_DEVICE)
return;
__arm64_noalias_unmap(dev, dma_to_phys(dev, dma_addr), size, dir, attrs);
}
static void arm64_noalias_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *sg;
int i;
if (dir == DMA_TO_DEVICE)
return;
for_each_sg (sgl, sg, nents, i)
__arm64_noalias_unmap(dev, dma_to_phys(dev, sg->dma_address),
sg->length, dir, attrs);
}
static int arm64_noalias_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
int i;
struct scatterlist *sg;
for_each_sg(sgl, sg, nents, i) {
sg->dma_address = arm64_noalias_map_page(dev, sg_page(sg), sg->offset,
sg->length, dir, attrs);
if (sg->dma_address == DMA_MAPPING_ERROR)
goto out_unmap;
sg->dma_length = sg->length;
}
return nents;
out_unmap:
arm64_noalias_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
return 0;
}
static void arm64_noalias_sync_single_for_device(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
__arm64_noalias_sync_for_device(dev, dma_to_phys(dev, addr), size, dir);
}
static void arm64_noalias_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
__arm64_noalias_sync_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
}
static void arm64_noalias_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
for_each_sg(sgl, sg, nents, i)
arm64_noalias_sync_single_for_device(dev, sg->dma_address, sg->length, dir);
}
static void arm64_noalias_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
struct scatterlist *sg;
int i;
for_each_sg(sgl, sg, nents, i)
arm64_noalias_sync_single_for_cpu(dev, sg->dma_address, sg->length, dir);
}
static const struct dma_map_ops arm64_noalias_ops = {
.alloc = arm64_noalias_alloc,
.free = arm64_noalias_free,
.alloc_pages = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
.mmap = dma_common_mmap,
.get_sgtable = dma_common_get_sgtable,
.map_page = arm64_noalias_map_page,
.unmap_page = arm64_noalias_unmap_page,
.map_sg = arm64_noalias_map_sg,
.unmap_sg = arm64_noalias_unmap_sg,
.sync_single_for_cpu = arm64_noalias_sync_single_for_cpu,
.sync_single_for_device = arm64_noalias_sync_single_for_device,
.sync_sg_for_cpu = arm64_noalias_sync_sg_for_cpu,
.sync_sg_for_device = arm64_noalias_sync_sg_for_device,
.dma_supported = dma_direct_supported,
.get_required_mask = dma_direct_get_required_mask,
.max_mapping_size = swiotlb_max_mapping_size,
};
#ifdef CONFIG_IOMMU_DMA
static const struct dma_map_ops *iommu_dma_ops;
static void *arm64_iommu_alloc(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t gfp, unsigned long attrs)
{
struct page **pages;
void *ret;
int i;
size = PAGE_ALIGN(size);
if (!gfpflags_allow_blocking(gfp) || (attrs & DMA_ATTR_FORCE_CONTIGUOUS)) {
ret = dma_common_alloc_pages(dev, size, dma_addr, 0, gfp);
return ret ? page_address(ret) : NULL;
}
ret = iommu_dma_ops->alloc(dev, size, dma_addr, gfp, attrs);
if (ret) {
pages = dma_common_find_pages(ret);
for (i = 0; i < size / PAGE_SIZE; i++)
if (set_nc(page_address(pages[i]), PAGE_SIZE))
goto err;
}
return ret;
err:
while (i--)
clear_nc(page_address(pages[i]), PAGE_SIZE);
iommu_dma_ops->free(dev, size, ret, *dma_addr, attrs);
return NULL;
}
static void arm64_iommu_free(struct device *dev, size_t size, void *cpu_addr,
dma_addr_t dma_addr, unsigned long attrs)
{
struct page **pages = dma_common_find_pages(cpu_addr);
int i;
size = PAGE_ALIGN(size);
if (!pages)
return dma_common_free_pages(dev, size, virt_to_page(cpu_addr), dma_addr, 0);
for (i = 0; i < size / PAGE_SIZE; i++)
clear_nc(page_address(pages[i]), PAGE_SIZE);
iommu_dma_ops->free(dev, size, cpu_addr, dma_addr, attrs);
}
static dma_addr_t arm64_iommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
phys_addr_t phys = page_to_phys(page) + offset;
dma_addr_t ret;
if (dir == DMA_TO_DEVICE)
return iommu_dma_ops->map_page(dev, page, offset, size, dir, attrs);
phys = __arm64_noalias_map(dev, phys, size, dir, attrs, page_mapped(page));
if (phys == DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
attrs |= DMA_ATTR_SKIP_CPU_SYNC;
ret = iommu_dma_ops->map_page(dev, phys_to_page(phys), offset_in_page(phys),
size, dir, attrs);
if (ret == DMA_MAPPING_ERROR)
__arm64_noalias_unmap(dev, phys, size, dir, attrs);
return ret;
}
static void arm64_iommu_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
phys_addr_t phys;
if (dir == DMA_TO_DEVICE)
return iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs);
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr);
iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
__arm64_noalias_unmap(dev, phys, size, dir, attrs);
}
static int arm64_iommu_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
int i, ret;
struct scatterlist *sg;
phys_addr_t *orig_phys;
if (dir == DMA_TO_DEVICE)
return iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs);
orig_phys = kmalloc_array(nents, sizeof(*orig_phys), GFP_ATOMIC);
if (!orig_phys)
return 0;
for_each_sg(sgl, sg, nents, i) {
phys_addr_t phys = sg_phys(sg);
/*
* Note we do not have the page_mapped() check here, since
* bouncing plays complete havoc with dma-buf imports. Those
* may well be mapped in userspace, but we hope and pray that
* it's via dma_mmap_attrs() so any such mappings are safely
* non-cacheable. DO NOT allow a block device or other similar
* scatterlist user to get here (disable IOMMUs if necessary),
* since we can't mitigate for both conflicting use-cases.
*/
phys = __arm64_noalias_map(dev, phys, sg->length, dir, attrs, false);
if (phys == DMA_MAPPING_ERROR)
goto out_unmap;
orig_phys[i] = sg_phys(sg);
sg_assign_page(sg, phys_to_page(phys));
sg->offset = offset_in_page(phys);
}
ret = iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
if (ret <= 0)
goto out_unmap;
for_each_sg(sgl, sg, nents, i) {
sg_assign_page(sg, phys_to_page(orig_phys[i]));
sg->offset = offset_in_page(orig_phys[i]);
}
kfree(orig_phys);
return ret;
out_unmap:
for_each_sg(sgl, sg, nents, i) {
__arm64_noalias_unmap(dev, sg_phys(sg), sg->length, dir, attrs);
sg_assign_page(sg, phys_to_page(orig_phys[i]));
sg->offset = offset_in_page(orig_phys[i]);
}
kfree(orig_phys);
return 0;
}
static void arm64_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
struct iommu_domain *domain;
struct scatterlist *sg, *tmp;
dma_addr_t iova;
int i;
if (dir == DMA_TO_DEVICE)
return iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs);
domain = iommu_get_dma_domain(dev);
iova = sgl->dma_address;
tmp = sgl;
for_each_sg(sgl, sg, nents, i) {
phys_addr_t phys = iommu_iova_to_phys(domain, iova);
__arm64_noalias_unmap(dev, phys, sg->length, dir, attrs);
iova += sg->length;
if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) {
tmp = sg_next(tmp);
iova = tmp->dma_address;
}
}
iommu_dma_ops->unmap_sg(dev, sgl, nents, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
}
static void arm64_iommu_sync_single_for_device(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr);
__arm64_noalias_sync_for_device(dev, phys, size, dir);
}
static void arm64_iommu_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
size_t size, enum dma_data_direction dir)
{
phys_addr_t phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), addr);
__arm64_noalias_sync_for_cpu(dev, phys, size, dir);
}
static void arm64_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct scatterlist *sg, *tmp = sgl;
dma_addr_t iova = sgl->dma_address;
int i;
for_each_sg(sgl, sg, nents, i) {
phys_addr_t phys = iommu_iova_to_phys(domain, iova);
__arm64_noalias_sync_for_device(dev, phys, sg->length, dir);
iova += sg->length;
if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) {
tmp = sg_next(tmp);
iova = tmp->dma_address;
}
}
}
static void arm64_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir)
{
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct scatterlist *sg, *tmp = sgl;
dma_addr_t iova = sgl->dma_address;
int i;
for_each_sg(sgl, sg, nents, i) {
phys_addr_t phys = iommu_iova_to_phys(domain, iova);
__arm64_noalias_sync_for_cpu(dev, phys, sg->length, dir);
iova += sg->length;
if (iova == tmp->dma_address + tmp->dma_length && !sg_is_last(tmp)) {
tmp = sg_next(tmp);
iova = tmp->dma_address;
}
}
}
static struct dma_map_ops arm64_iommu_ops = {
.alloc = arm64_iommu_alloc,
.free = arm64_iommu_free,
.alloc_pages = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
.map_page = arm64_iommu_map_page,
.unmap_page = arm64_iommu_unmap_page,
.map_sg = arm64_iommu_map_sg,
.unmap_sg = arm64_iommu_unmap_sg,
.sync_single_for_cpu = arm64_iommu_sync_single_for_cpu,
.sync_single_for_device = arm64_iommu_sync_single_for_device,
.sync_sg_for_cpu = arm64_iommu_sync_sg_for_cpu,
.sync_sg_for_device = arm64_iommu_sync_sg_for_device,
};
static void arm64_init_iommu_ops(struct device *dev)
{
const struct dma_map_ops *ops = dev->dma_ops;
dev->dma_ops = &arm64_iommu_ops;
if (iommu_dma_ops)
return;
iommu_dma_ops = ops;
arm64_iommu_ops.mmap = ops->mmap;
arm64_iommu_ops.get_sgtable = ops->get_sgtable;
arm64_iommu_ops.map_resource = ops->map_resource;
arm64_iommu_ops.unmap_resource = ops->unmap_resource;
arm64_iommu_ops.get_merge_boundary = ops->get_merge_boundary;
}
#endif /* CONFIG_IOMMU_DMA */
#endif /* CONFIG_ARM64_ERRATUM_2454944*/
#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS
void arch_teardown_dma_ops(struct device *dev)
{
dev->dma_ops = NULL;
@@ -42,6 +634,14 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
const struct iommu_ops *iommu, bool coherent)
{
int cls = cache_line_size_of_cpu();
#ifdef CONFIG_ARM64_ERRATUM_2454944
bool noalias = !coherent && cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS);
if (noalias) {
dev_info(dev, "applying no-alias DMA workaround\n");
dev->dma_ops = &arm64_noalias_ops;
}
#endif
WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN,
TAINT_CPU_OUT_OF_SPEC,
@@ -55,6 +655,10 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
trace_android_vh_iommu_setup_dma_ops(dev, dma_base, size);
trace_android_rvh_iommu_setup_dma_ops(dev, dma_base, size);
}
#if defined(CONFIG_ARM64_ERRATUM_2454944) && defined(CONFIG_IOMMU_DMA)
if (noalias && dev->dma_ops != &arm64_noalias_ops)
arm64_init_iommu_ops(dev);
#endif
#ifdef CONFIG_XEN
if (xen_initial_domain())
+2 -1
View File
@@ -581,7 +581,8 @@ static void __init free_unused_memmap(void)
void __init mem_init(void)
{
if (swiotlb_force == SWIOTLB_FORCE ||
max_pfn > PFN_DOWN(arm64_dma_phys_limit))
max_pfn > PFN_DOWN(arm64_dma_phys_limit) ||
cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS))
swiotlb_init(1);
else
swiotlb_force = SWIOTLB_NO_FORCE;
+1 -1
View File
@@ -502,7 +502,7 @@ static void __init map_mem(pgd_t *pgdp)
u64 i;
if (rodata_full || debug_pagealloc_enabled() ||
IS_ENABLED(CONFIG_KFENCE))
IS_ENABLED(CONFIG_KFENCE) || cpus_have_cap(ARM64_WORKAROUND_NO_DMA_ALIAS))
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*