From d61927d784e25c0ce5ab6015538e6a82f152c24e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 4 Dec 2024 16:13:39 -0400 Subject: [PATCH 1/8] iommufd/selftest: Remove domain_alloc_paging() Since this implements domain_alloc_paging_flags() it only needs one op. Fold mock_domain_alloc_paging() into mock_domain_alloc_paging_flags(). Link: https://patch.msgid.link/r/0-v1-8a3e7e21ff6a+1745d-iommufd_paging_flags_jgg@nvidia.com Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/selftest.c | 43 ++++++++++++-------------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index a0de6d6d4e68..6512c1d16348 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -311,25 +311,6 @@ static const struct iommu_dirty_ops dirty_ops = { .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; -static struct iommu_domain *mock_domain_alloc_paging(struct device *dev) -{ - struct mock_dev *mdev = to_mock_dev(dev); - struct mock_iommu_domain *mock; - - mock = kzalloc(sizeof(*mock), GFP_KERNEL); - if (!mock) - return NULL; - mock->domain.geometry.aperture_start = MOCK_APERTURE_START; - mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; - mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; - if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA) - mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE; - mock->domain.ops = mock_ops.default_domain_ops; - mock->domain.type = IOMMU_DOMAIN_UNMANAGED; - xa_init(&mock->pfns); - return &mock->domain; -} - static struct mock_iommu_domain_nested * __mock_domain_alloc_nested(const struct iommu_user_data *user_data) { @@ -385,21 +366,30 @@ mock_domain_alloc_paging_flags(struct device *dev, u32 flags, bool has_dirty_flag = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_NEST_PARENT; - bool no_dirty_ops = to_mock_dev(dev)->flags & - MOCK_FLAGS_DEVICE_NO_DIRTY; - struct iommu_domain *domain; + struct mock_dev *mdev = to_mock_dev(dev); + bool no_dirty_ops = mdev->flags & MOCK_FLAGS_DEVICE_NO_DIRTY; + struct mock_iommu_domain *mock; if (user_data) return ERR_PTR(-EOPNOTSUPP); if ((flags & ~PAGING_FLAGS) || (has_dirty_flag && no_dirty_ops)) return ERR_PTR(-EOPNOTSUPP); - domain = mock_domain_alloc_paging(dev); - if (!domain) + mock = kzalloc(sizeof(*mock), GFP_KERNEL); + if (!mock) return ERR_PTR(-ENOMEM); + mock->domain.geometry.aperture_start = MOCK_APERTURE_START; + mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; + mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; + if (dev && mdev->flags & MOCK_FLAGS_DEVICE_HUGE_IOVA) + mock->domain.pgsize_bitmap |= MOCK_HUGE_PAGE_SIZE; + mock->domain.ops = mock_ops.default_domain_ops; + mock->domain.type = IOMMU_DOMAIN_UNMANAGED; + xa_init(&mock->pfns); + if (has_dirty_flag) - domain->dirty_ops = &dirty_ops; - return domain; + mock->domain.dirty_ops = &dirty_ops; + return &mock->domain; } static void mock_domain_free(struct iommu_domain *domain) @@ -713,7 +703,6 @@ static const struct iommu_ops mock_ops = { .owner = THIS_MODULE, .pgsize_bitmap = MOCK_IO_PAGE_SIZE, .hw_info = mock_domain_hw_info, - .domain_alloc_paging = mock_domain_alloc_paging, .domain_alloc_paging_flags = mock_domain_alloc_paging_flags, .domain_alloc_nested = mock_domain_alloc_nested, .capable = mock_domain_capable, From 11534b4de2a1bcc438ed90d031184c9c847e8560 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Sat, 7 Dec 2024 04:01:08 -0800 Subject: [PATCH 2/8] iommufd: Deal with IOMMU_HWPT_FAULT_ID_VALID in iommufd core IOMMU_HWPT_FAULT_ID_VALID is used to mark if the fault_id field of iommu_hwp_alloc is valid or not. As the fault_id field is handled in the iommufd core, so it makes sense to sanitize the IOMMU_HWPT_FAULT_ID_VALID flag in the iommufd core, and mask it out before passing the user flags to the iommu drivers. Link: https://patch.msgid.link/r/20241207120108.5640-1-yi.l.liu@intel.com Signed-off-by: Yi Liu Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 8 +------- drivers/iommu/intel/iommu.c | 3 +-- drivers/iommu/iommufd/hw_pagetable.c | 10 +++++++--- drivers/iommu/iommufd/selftest.c | 2 +- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c index c7cc613050d9..5aa2e7af58b4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c @@ -178,18 +178,12 @@ arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, const struct iommu_user_data *user_data) { struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); - const u32 SUPPORTED_FLAGS = IOMMU_HWPT_FAULT_ID_VALID; struct arm_smmu_nested_domain *nested_domain; struct iommu_hwpt_arm_smmuv3 arg; bool enable_ats = false; int ret; - /* - * Faults delivered to the nested domain are faults that originated by - * the S1 in the domain. The core code will match all PASIDs when - * delivering the fault due to user_pasid_table - */ - if (flags & ~SUPPORTED_FLAGS) + if (flags) return ERR_PTR(-EOPNOTSUPP); ret = iommu_copy_struct_from_user(&arg, user_data, diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7d0acb74d5a5..c8f9c70a04ab 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3340,8 +3340,7 @@ intel_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags, bool first_stage; if (flags & - (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING - | IOMMU_HWPT_FAULT_ID_VALID))) + (~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING))) return ERR_PTR(-EOPNOTSUPP); if (nested_parent && !nested_supported(iommu)) return ERR_PTR(-EOPNOTSUPP); diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index ce03c3804651..598be26a14e2 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -140,8 +140,8 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, hwpt_paging->nest_parent = flags & IOMMU_HWPT_ALLOC_NEST_PARENT; if (ops->domain_alloc_paging_flags) { - hwpt->domain = ops->domain_alloc_paging_flags(idev->dev, flags, - user_data); + hwpt->domain = ops->domain_alloc_paging_flags(idev->dev, + flags & ~IOMMU_HWPT_FAULT_ID_VALID, user_data); if (IS_ERR(hwpt->domain)) { rc = PTR_ERR(hwpt->domain); hwpt->domain = NULL; @@ -280,6 +280,8 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, struct iommufd_hw_pagetable *hwpt; int rc; + if (flags & ~IOMMU_HWPT_FAULT_ID_VALID) + return ERR_PTR(-EOPNOTSUPP); if (!user_data->len) return ERR_PTR(-EOPNOTSUPP); if (!viommu->ops || !viommu->ops->alloc_domain_nested) @@ -296,7 +298,9 @@ iommufd_viommu_alloc_hwpt_nested(struct iommufd_viommu *viommu, u32 flags, hwpt_nested->parent = viommu->hwpt; hwpt->domain = - viommu->ops->alloc_domain_nested(viommu, flags, user_data); + viommu->ops->alloc_domain_nested(viommu, + flags & ~IOMMU_HWPT_FAULT_ID_VALID, + user_data); if (IS_ERR(hwpt->domain)) { rc = PTR_ERR(hwpt->domain); hwpt->domain = NULL; diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index 6512c1d16348..d40deb0a4f06 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -585,7 +585,7 @@ mock_viommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, struct mock_viommu *mock_viommu = to_mock_viommu(viommu); struct mock_iommu_domain_nested *mock_nested; - if (flags & ~IOMMU_HWPT_FAULT_ID_VALID) + if (flags) return ERR_PTR(-EOPNOTSUPP); mock_nested = __mock_domain_alloc_nested(user_data); From d9df72c6acd683adf6dd23c061f3a414ec00b1f8 Mon Sep 17 00:00:00 2001 From: Suraj Sonawane Date: Sun, 24 Nov 2024 01:29:00 +0530 Subject: [PATCH 3/8] iommu: iommufd: fix WARNING in iommufd_device_unbind Fix an issue detected by syzbot: WARNING in iommufd_device_unbind iommufd: Time out waiting for iommufd object to become free Resolve a warning in iommufd_device_unbind caused by a timeout while waiting for the shortterm_users reference count to reach zero. The existing 10-second timeout is insufficient in some scenarios, resulting in failures the above warning. Increase the timeout in iommufd_object_dec_wait_shortterm from 10 seconds to 60 seconds to allow sufficient time for the reference count to drop to zero. This change prevents premature timeouts and reduces the likelihood of warnings during iommufd_device_unbind. Fixes: 6f9c4d8c468c ("iommufd: Do not UAF during iommufd_put_object()") Link: https://patch.msgid.link/r/20241123195900.3176-1-surajsonawane0215@gmail.com Reported-by: syzbot+c92878e123785b1fa2db@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=c92878e123785b1fa2db Tested-by: syzbot+c92878e123785b1fa2db@syzkaller.appspotmail.com Signed-off-by: Suraj Sonawane Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 97c5e3567d33..d898d05be690 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -104,7 +104,7 @@ static int iommufd_object_dec_wait_shortterm(struct iommufd_ctx *ictx, if (wait_event_timeout(ictx->destroy_wait, refcount_read(&to_destroy->shortterm_users) == 0, - msecs_to_jiffies(10000))) + msecs_to_jiffies(60000))) return 0; pr_crit("Time out waiting for iommufd object to become free\n"); From e24c1551059268b37f6f40639883eafb281b8b9c Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Mon, 13 Jan 2025 22:38:20 +0000 Subject: [PATCH 4/8] iommufd/iova_bitmap: Fix shift-out-of-bounds in iova_bitmap_offset_to_index() Resolve a UBSAN shift-out-of-bounds issue in iova_bitmap_offset_to_index() where shifting the constant "1" (of type int) by bitmap->mapped.pgshift (an unsigned long value) could result in undefined behavior. The constant "1" defaults to a 32-bit "int", and when "pgshift" exceeds 31 (e.g., pgshift = 63) the shift operation overflows, as the result cannot be represented in a 32-bit type. To resolve this, the constant is updated to "1UL", promoting it to an unsigned long type to match the operand's type. Fixes: 58ccf0190d19 ("vfio: Add an IOVA bitmap support") Link: https://patch.msgid.link/r/20250113223820.10713-1-qasdev00@gmail.com Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=85992ace37d5b7b51635 Signed-off-by: Qasim Ijaz Reviewed-by: Joao Martins Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/iova_bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iommufd/iova_bitmap.c b/drivers/iommu/iommufd/iova_bitmap.c index ab665cf38ef4..39a86a4a1d3a 100644 --- a/drivers/iommu/iommufd/iova_bitmap.c +++ b/drivers/iommu/iommufd/iova_bitmap.c @@ -130,7 +130,7 @@ struct iova_bitmap { static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, unsigned long iova) { - unsigned long pgsize = 1 << bitmap->mapped.pgshift; + unsigned long pgsize = 1UL << bitmap->mapped.pgshift; return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); } From 442003f3a842dc374b1c706187778c3d57b84c23 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 7 Jan 2025 09:10:04 -0800 Subject: [PATCH 5/8] iommufd: Keep OBJ/IOCTL lists in an alphabetical order Reorder the existing OBJ/IOCTL lists. Also run clang-format for the same coding style at line wrappings. No functional change. Link: https://patch.msgid.link/r/c5e6d6e0a0bb7abc92ad26937fde19c9426bee96.1736237481.git.nicolinc@nvidia.com Reviewed-by: Lu Baolu Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/main.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index d898d05be690..ccf616462a1c 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -307,9 +307,9 @@ union ucmd_buffer { struct iommu_ioas_map map; struct iommu_ioas_unmap unmap; struct iommu_option option; + struct iommu_vdevice_alloc vdev; struct iommu_vfio_ioas vfio_ioas; struct iommu_viommu_alloc viommu; - struct iommu_vdevice_alloc vdev; #ifdef CONFIG_IOMMUFD_TEST struct iommu_test_cmd test; #endif @@ -333,8 +333,8 @@ struct iommufd_ioctl_op { } static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), - IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc, - out_fault_fd), + IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, + struct iommu_fault_alloc, out_fault_fd), IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, __reserved), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, @@ -355,20 +355,18 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { src_iova), IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges, struct iommu_ioas_iova_ranges, out_iova_alignment), - IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, - iova), + IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, iova), IOCTL_OP(IOMMU_IOAS_MAP_FILE, iommufd_ioas_map_file, struct iommu_ioas_map_file, iova), IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap, length), - IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, - val64), + IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, val64), + IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, + struct iommu_vdevice_alloc, virt_id), IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, __reserved), IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl, struct iommu_viommu_alloc, out_viommu_id), - IOCTL_OP(IOMMU_VDEVICE_ALLOC, iommufd_vdevice_alloc_ioctl, - struct iommu_vdevice_alloc, virt_id), #ifdef CONFIG_IOMMUFD_TEST IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), #endif @@ -490,8 +488,8 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { [IOMMUFD_OBJ_DEVICE] = { .destroy = iommufd_device_destroy, }, - [IOMMUFD_OBJ_IOAS] = { - .destroy = iommufd_ioas_destroy, + [IOMMUFD_OBJ_FAULT] = { + .destroy = iommufd_fault_destroy, }, [IOMMUFD_OBJ_HWPT_PAGING] = { .destroy = iommufd_hwpt_paging_destroy, @@ -501,15 +499,15 @@ static const struct iommufd_object_ops iommufd_object_ops[] = { .destroy = iommufd_hwpt_nested_destroy, .abort = iommufd_hwpt_nested_abort, }, - [IOMMUFD_OBJ_FAULT] = { - .destroy = iommufd_fault_destroy, - }, - [IOMMUFD_OBJ_VIOMMU] = { - .destroy = iommufd_viommu_destroy, + [IOMMUFD_OBJ_IOAS] = { + .destroy = iommufd_ioas_destroy, }, [IOMMUFD_OBJ_VDEVICE] = { .destroy = iommufd_vdevice_destroy, }, + [IOMMUFD_OBJ_VIOMMU] = { + .destroy = iommufd_viommu_destroy, + }, #ifdef CONFIG_IOMMUFD_TEST [IOMMUFD_OBJ_SELFTEST] = { .destroy = iommufd_selftest_destroy, From 3f4818ec139030f425476bf8a10b616bab53a7b5 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 14 Jan 2025 22:55:59 -0800 Subject: [PATCH 6/8] iommufd/fault: Destroy response and mutex in iommufd_fault_destroy() Both were missing in the initial patch. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/bc8bb13e215af27e62ee51bdba3648dd4ed2dce3.1736923732.git.nicolinc@nvidia.com Cc: stable@vger.kernel.org Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/fault.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 1fe804e28a86..685510224d05 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -213,6 +213,7 @@ void iommufd_fault_destroy(struct iommufd_object *obj) { struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj); struct iopf_group *group, *next; + unsigned long index; /* * The iommufd object's reference count is zero at this point. @@ -225,6 +226,13 @@ void iommufd_fault_destroy(struct iommufd_object *obj) iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); iopf_free_group(group); } + xa_for_each(&fault->response, index, group) { + xa_erase(&fault->response, index); + iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); + iopf_free_group(group); + } + xa_destroy(&fault->response); + mutex_destroy(&fault->mutex); } static void iommufd_compose_fault_message(struct iommu_fault *fault, From 3d49020a327cd7d069059317c11df24e407ccfa3 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Fri, 17 Jan 2025 11:29:01 -0800 Subject: [PATCH 7/8] iommufd/fault: Use a separate spinlock to protect fault->deliver list The fault->mutex serializes the fault read()/write() fops and the iommufd_fault_auto_response_faults(), mainly for fault->response. Also, it was conveniently used to fence the fault->deliver in poll() fop and iommufd_fault_iopf_handler(). However, copy_from/to_user() may sleep if pagefaults are enabled. Thus, they could take a long time to wait for user pages to swap in, blocking iommufd_fault_iopf_handler() and its caller that is typically a shared IRQ handler of an IOMMU driver, resulting in a potential global DOS. Instead of reusing the mutex to protect the fault->deliver list, add a separate spinlock, nested under the mutex, to do the job. iommufd_fault_iopf_handler() would no longer be blocked by copy_from/to_user(). Add a free_list in iommufd_auto_response_faults(), so the spinlock can simply fence a fast list_for_each_entry_safe routine. Provide two deliver list helpers for iommufd_fault_fops_read() to use: - Fetch the first iopf_group out of the fault->deliver list - Restore an iopf_group back to the head of the fault->deliver list Lastly, move the mutex closer to the response in the fault structure, and update its kdoc accordingly. Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object") Link: https://patch.msgid.link/r/20250117192901.79491-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe Reviewed-by: Kevin Tian Reviewed-by: Lu Baolu Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/fault.c | 34 ++++++++++++++++--------- drivers/iommu/iommufd/iommufd_private.h | 29 +++++++++++++++++++-- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index 685510224d05..a9160f4443d2 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -103,15 +103,23 @@ static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt, { struct iommufd_fault *fault = hwpt->fault; struct iopf_group *group, *next; + struct list_head free_list; unsigned long index; if (!fault) return; + INIT_LIST_HEAD(&free_list); mutex_lock(&fault->mutex); + spin_lock(&fault->lock); list_for_each_entry_safe(group, next, &fault->deliver, node) { if (group->attach_handle != &handle->handle) continue; + list_move(&group->node, &free_list); + } + spin_unlock(&fault->lock); + + list_for_each_entry_safe(group, next, &free_list, node) { list_del(&group->node); iopf_group_response(group, IOMMU_PAGE_RESP_INVALID); iopf_free_group(group); @@ -266,17 +274,19 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, return -ESPIPE; mutex_lock(&fault->mutex); - while (!list_empty(&fault->deliver) && count > done) { - group = list_first_entry(&fault->deliver, - struct iopf_group, node); - - if (group->fault_count * fault_size > count - done) + while ((group = iommufd_fault_deliver_fetch(fault))) { + if (done >= count || + group->fault_count * fault_size > count - done) { + iommufd_fault_deliver_restore(fault, group); break; + } rc = xa_alloc(&fault->response, &group->cookie, group, xa_limit_32b, GFP_KERNEL); - if (rc) + if (rc) { + iommufd_fault_deliver_restore(fault, group); break; + } idev = to_iommufd_handle(group->attach_handle)->idev; list_for_each_entry(iopf, &group->faults, list) { @@ -285,13 +295,12 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, group->cookie); if (copy_to_user(buf + done, &data, fault_size)) { xa_erase(&fault->response, group->cookie); + iommufd_fault_deliver_restore(fault, group); rc = -EFAULT; break; } done += fault_size; } - - list_del(&group->node); } mutex_unlock(&fault->mutex); @@ -349,10 +358,10 @@ static __poll_t iommufd_fault_fops_poll(struct file *filep, __poll_t pollflags = EPOLLOUT; poll_wait(filep, &fault->wait_queue, wait); - mutex_lock(&fault->mutex); + spin_lock(&fault->lock); if (!list_empty(&fault->deliver)) pollflags |= EPOLLIN | EPOLLRDNORM; - mutex_unlock(&fault->mutex); + spin_unlock(&fault->lock); return pollflags; } @@ -394,6 +403,7 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd) INIT_LIST_HEAD(&fault->deliver); xa_init_flags(&fault->response, XA_FLAGS_ALLOC1); mutex_init(&fault->mutex); + spin_lock_init(&fault->lock); init_waitqueue_head(&fault->wait_queue); filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops, @@ -442,9 +452,9 @@ int iommufd_fault_iopf_handler(struct iopf_group *group) hwpt = group->attach_handle->domain->fault_data; fault = hwpt->fault; - mutex_lock(&fault->mutex); + spin_lock(&fault->lock); list_add_tail(&group->node, &fault->deliver); - mutex_unlock(&fault->mutex); + spin_unlock(&fault->lock); wake_up_interruptible(&fault->wait_queue); diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index b6d706cf2c66..0b1bafc7fd99 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -443,14 +443,39 @@ struct iommufd_fault { struct iommufd_ctx *ictx; struct file *filep; - /* The lists of outstanding faults protected by below mutex. */ - struct mutex mutex; + spinlock_t lock; /* protects the deliver list */ struct list_head deliver; + struct mutex mutex; /* serializes response flows */ struct xarray response; struct wait_queue_head wait_queue; }; +/* Fetch the first node out of the fault->deliver list */ +static inline struct iopf_group * +iommufd_fault_deliver_fetch(struct iommufd_fault *fault) +{ + struct list_head *list = &fault->deliver; + struct iopf_group *group = NULL; + + spin_lock(&fault->lock); + if (!list_empty(list)) { + group = list_first_entry(list, struct iopf_group, node); + list_del(&group->node); + } + spin_unlock(&fault->lock); + return group; +} + +/* Restore a node back to the head of the fault->deliver list */ +static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault, + struct iopf_group *group) +{ + spin_lock(&fault->lock); + list_add(&group->node, &fault->deliver); + spin_unlock(&fault->lock); +} + struct iommufd_attach_handle { struct iommu_attach_handle handle; struct iommufd_device *idev; From e721f619e3ec9bae08bf419c3944cf1e6966c821 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 20 Jan 2025 11:50:51 -0800 Subject: [PATCH 8/8] iommufd: Fix struct iommu_hwpt_pgfault init and padding The iommu_hwpt_pgfault is used to report IO page fault data to userspace, but iommufd_fault_fops_read was never zeroing its padding. This leaks the content of the kernel stack memory to userspace. Also, the iommufd uAPI requires explicit padding and use of __aligned_u64 to ensure ABI compatibility's with 32 bit. pahole result, before: struct iommu_hwpt_pgfault { __u32 flags; /* 0 4 */ __u32 dev_id; /* 4 4 */ __u32 pasid; /* 8 4 */ __u32 grpid; /* 12 4 */ __u32 perm; /* 16 4 */ /* XXX 4 bytes hole, try to pack */ __u64 addr; /* 24 8 */ __u32 length; /* 32 4 */ __u32 cookie; /* 36 4 */ /* size: 40, cachelines: 1, members: 8 */ /* sum members: 36, holes: 1, sum holes: 4 */ /* last cacheline: 40 bytes */ }; pahole result, after: struct iommu_hwpt_pgfault { __u32 flags; /* 0 4 */ __u32 dev_id; /* 4 4 */ __u32 pasid; /* 8 4 */ __u32 grpid; /* 12 4 */ __u32 perm; /* 16 4 */ __u32 __reserved; /* 20 4 */ __u64 addr __attribute__((__aligned__(8))); /* 24 8 */ __u32 length; /* 32 4 */ __u32 cookie; /* 36 4 */ /* size: 40, cachelines: 1, members: 9 */ /* forced alignments: 1 */ /* last cacheline: 40 bytes */ } __attribute__((__aligned__(8))); Fixes: c714f15860fc ("iommufd: Add fault and response message definitions") Link: https://patch.msgid.link/r/20250120195051.2450-1-nicolinc@nvidia.com Cc: stable@vger.kernel.org Reviewed-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/fault.c | 2 +- include/uapi/linux/iommufd.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c index a9160f4443d2..d9a937450e55 100644 --- a/drivers/iommu/iommufd/fault.c +++ b/drivers/iommu/iommufd/fault.c @@ -263,7 +263,7 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf, { size_t fault_size = sizeof(struct iommu_hwpt_pgfault); struct iommufd_fault *fault = filep->private_data; - struct iommu_hwpt_pgfault data; + struct iommu_hwpt_pgfault data = {}; struct iommufd_device *idev; struct iopf_group *group; struct iopf_fault *iopf; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 34810f6ae2b5..78747b24bd0f 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -868,6 +868,7 @@ enum iommu_hwpt_pgfault_perm { * @pasid: Process Address Space ID * @grpid: Page Request Group Index * @perm: Combination of enum iommu_hwpt_pgfault_perm + * @__reserved: Must be 0. * @addr: Fault address * @length: a hint of how much data the requestor is expecting to fetch. For * example, if the PRI initiator knows it is going to do a 10MB @@ -883,7 +884,8 @@ struct iommu_hwpt_pgfault { __u32 pasid; __u32 grpid; __u32 perm; - __u64 addr; + __u32 __reserved; + __aligned_u64 addr; __u32 length; __u32 cookie; };