ceph: avoid iput_final() while holding mutex or in dispatch thread
iput_final() may wait for reahahead pages. The wait can cause deadlock.
For example:
Workqueue: ceph-msgr ceph_con_workfn [libceph]
Call Trace:
schedule+0x36/0x80
io_schedule+0x16/0x40
__lock_page+0x101/0x140
truncate_inode_pages_range+0x556/0x9f0
truncate_inode_pages_final+0x4d/0x60
evict+0x182/0x1a0
iput+0x1d2/0x220
iterate_session_caps+0x82/0x230 [ceph]
dispatch+0x678/0xa80 [ceph]
ceph_con_workfn+0x95b/0x1560 [libceph]
process_one_work+0x14d/0x410
worker_thread+0x4b/0x460
kthread+0x105/0x140
ret_from_fork+0x22/0x40
Workqueue: ceph-msgr ceph_con_workfn [libceph]
Call Trace:
__schedule+0x3d6/0x8b0
schedule+0x36/0x80
schedule_preempt_disabled+0xe/0x10
mutex_lock+0x2f/0x40
ceph_check_caps+0x505/0xa80 [ceph]
ceph_put_wrbuffer_cap_refs+0x1e5/0x2c0 [ceph]
writepages_finish+0x2d3/0x410 [ceph]
__complete_request+0x26/0x60 [libceph]
handle_reply+0x6c8/0xa10 [libceph]
dispatch+0x29a/0xbb0 [libceph]
ceph_con_workfn+0x95b/0x1560 [libceph]
process_one_work+0x14d/0x410
worker_thread+0x4b/0x460
kthread+0x105/0x140
ret_from_fork+0x22/0x40
In above example, truncate_inode_pages_range() waits for readahead pages
while holding s_mutex. ceph_check_caps() waits for s_mutex and blocks
OSD dispatch thread. Later OSD replies (for readahead) can't be handled.
ceph_check_caps() also may lock snap_rwsem for read. So similar deadlock
can happen if iput_final() is called while holding snap_rwsem.
In general, it's not good to call iput_final() inside MDS/OSD dispatch
threads or while holding any mutex.
The fix is introducing ceph_async_iput(), which calls iput_final() in
workqueue.
Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
+6
-3
@@ -74,7 +74,8 @@ void ceph_handle_quota(struct ceph_mds_client *mdsc,
|
||||
le64_to_cpu(h->max_files));
|
||||
spin_unlock(&ci->i_ceph_lock);
|
||||
|
||||
iput(inode);
|
||||
/* avoid calling iput_final() in dispatch thread */
|
||||
ceph_async_iput(inode);
|
||||
}
|
||||
|
||||
static struct ceph_quotarealm_inode *
|
||||
@@ -235,7 +236,8 @@ restart:
|
||||
|
||||
ci = ceph_inode(in);
|
||||
has_quota = __ceph_has_any_quota(ci);
|
||||
iput(in);
|
||||
/* avoid calling iput_final() while holding mdsc->snap_rwsem */
|
||||
ceph_async_iput(in);
|
||||
|
||||
next = realm->parent;
|
||||
if (has_quota || !next)
|
||||
@@ -372,7 +374,8 @@ restart:
|
||||
pr_warn("Invalid quota check op (%d)\n", op);
|
||||
exceeded = true; /* Just break the loop */
|
||||
}
|
||||
iput(in);
|
||||
/* avoid calling iput_final() while holding mdsc->snap_rwsem */
|
||||
ceph_async_iput(in);
|
||||
|
||||
next = realm->parent;
|
||||
if (exceeded || !next)
|
||||
|
||||
Reference in New Issue
Block a user