summarylogtreecommitdiffstats
path: root/0103-Bluetooth_fix_deadlock_for_RFCOMM_sk_state_change.patch
diff options
context:
space:
mode:
Diffstat (limited to '0103-Bluetooth_fix_deadlock_for_RFCOMM_sk_state_change.patch')
-rw-r--r--0103-Bluetooth_fix_deadlock_for_RFCOMM_sk_state_change.patch229
1 files changed, 229 insertions, 0 deletions
diff --git a/0103-Bluetooth_fix_deadlock_for_RFCOMM_sk_state_change.patch b/0103-Bluetooth_fix_deadlock_for_RFCOMM_sk_state_change.patch
new file mode 100644
index 000000000000..b5087316dab2
--- /dev/null
+++ b/0103-Bluetooth_fix_deadlock_for_RFCOMM_sk_state_change.patch
@@ -0,0 +1,229 @@
+
+From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
+From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
+Date: Mon, 4 Oct 2021 14:07:34 -0400
+Subject: [PATCH] Bluetooth: fix deadlock for RFCOMM sk state change
+
+Syzbot reports the following task hang [1]:
+
+INFO: task syz-executor255:8499 blocked for more than 143 seconds.
+ Not tainted 5.14.0-rc7-syzkaller #0
+
+Call Trace:
+ context_switch kernel/sched/core.c:4681 [inline]
+ __schedule+0x93a/0x26f0 kernel/sched/core.c:5938
+ schedule+0xd3/0x270 kernel/sched/core.c:6017
+ __lock_sock+0x13d/0x260 net/core/sock.c:2644
+ lock_sock_nested+0xf6/0x120 net/core/sock.c:3185
+ lock_sock include/net/sock.h:1612 [inline]
+ rfcomm_sk_state_change+0xb4/0x390 net/bluetooth/rfcomm/sock.c:73
+ __rfcomm_dlc_close+0x1b6/0x8a0 net/bluetooth/rfcomm/core.c:489
+ rfcomm_dlc_close+0x1ea/0x240 net/bluetooth/rfcomm/core.c:520
+ __rfcomm_sock_close+0xac/0x260 net/bluetooth/rfcomm/sock.c:220
+ rfcomm_sock_shutdown+0xe9/0x210 net/bluetooth/rfcomm/sock.c:931
+ rfcomm_sock_release+0x5f/0x140 net/bluetooth/rfcomm/sock.c:951
+ __sock_release+0xcd/0x280 net/socket.c:649
+ sock_close+0x18/0x20 net/socket.c:1314
+ __fput+0x288/0x920 fs/file_table.c:280
+ task_work_run+0xdd/0x1a0 kernel/task_work.c:164
+ exit_task_work include/linux/task_work.h:32 [inline]
+ do_exit+0xbd4/0x2a60 kernel/exit.c:825
+ do_group_exit+0x125/0x310 kernel/exit.c:922
+ get_signal+0x47f/0x2160 kernel/signal.c:2808
+ arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:865
+ handle_signal_work kernel/entry/common.c:148 [inline]
+ exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
+ exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:209
+ __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
+ syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:302
+ do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+Showing all locks held in the system:
+1 lock held by khungtaskd/1653:
+ #0: ffffffff8b97c280 (rcu_read_lock){....}-{1:2}, at:
+ debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6446
+1 lock held by krfcommd/4781:
+ #0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
+ rfcomm_process_sessions net/bluetooth/rfcomm/core.c:1979 [inline]
+ #0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
+ rfcomm_run+0x2ed/0x4a20 net/bluetooth/rfcomm/core.c:2086
+2 locks held by in:imklog/8206:
+ #0: ffff8880182ce5f0 (&f->f_pos_lock){+.+.}-{3:3}, at:
+ __fdget_pos+0xe9/0x100 fs/file.c:974
+ #1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
+ raw_spin_rq_lock_nested kernel/sched/core.c:460 [inline]
+ #1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock
+ kernel/sched/sched.h:1307 [inline]
+ #1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: rq_lock
+ kernel/sched/sched.h:1610 [inline]
+ #1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
+ __schedule+0x233/0x26f0 kernel/sched/core.c:5852
+4 locks held by syz-executor255/8499:
+ #0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
+ inode_lock include/linux/fs.h:774 [inline]
+ #0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
+ __sock_release+0x86/0x280 net/socket.c:648
+ #1:
+ ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
+ at: lock_sock include/net/sock.h:1612 [inline]
+ #1:
+ ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
+ at: rfcomm_sock_shutdown+0x54/0x210 net/bluetooth/rfcomm/sock.c:928
+ #2: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
+ rfcomm_dlc_close+0x34/0x240 net/bluetooth/rfcomm/core.c:507
+ #3: ffff888141bd6d28 (&d->lock){+.+.}-{3:3}, at:
+ __rfcomm_dlc_close+0x162/0x8a0 net/bluetooth/rfcomm/core.c:487
+==================================================================
+
+The task hangs because of a deadlock that occurs when lock_sock() is
+called in rfcomm_sk_state_change(). One such call stack is:
+
+ rfcomm_sock_shutdown():
+ lock_sock();
+ __rfcomm_sock_close():
+ rfcomm_dlc_close():
+ __rfcomm_dlc_close():
+ rfcomm_dlc_lock();
+ rfcomm_sk_state_change():
+ lock_sock();
+
+lock_sock() has to be called when the sk state is changed because the
+lock is not always held when rfcomm_sk_state_change() is
+called. However, besides the recursive deadlock, there is also an
+issue of a lock hierarchy inversion between rfcomm_dlc_lock() and
+lock_sock() if the socket is locked in rfcomm_sk_state_change().
+
+To avoid these issues, we can instead schedule the sk state change in
+the global workqueue. This is already the implicit assumption about
+how sk state changes happen. For example, in rfcomm_sock_shutdown(),
+the call to __rfcomm_sock_close() is followed by
+bt_sock_wait_state().
+
+Additionally, the call to rfcomm_sock_kill() inside
+rfcomm_sk_state_change() should be removed. The socket shouldn't be
+killed here because only rfcomm_sock_release() calls sock_orphan(),
+which it already follows up with a call to rfcomm_sock_kill().
+
+Fixes: b7ce436a5d79 ("Bluetooth: switch to lock_sock in RFCOMM")
+Link: https://syzkaller.appspot.com/bug?extid=7d51f807c81b190a127d [1]
+Reported-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
+Tested-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
+Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
+Cc: Hillf Danton <hdanton@sina.com>
+---
+ include/net/bluetooth/rfcomm.h | 3 +++
+ net/bluetooth/rfcomm/core.c | 2 ++
+ net/bluetooth/rfcomm/sock.c | 34 ++++++++++++++++++++++------------
+ 3 files changed, 27 insertions(+), 12 deletions(-)
+
+diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
+index 99d26879b02a53..a92799fc5e74d0 100644
+--- a/include/net/bluetooth/rfcomm.h
++++ b/include/net/bluetooth/rfcomm.h
+@@ -171,6 +171,7 @@ struct rfcomm_dlc {
+ struct rfcomm_session *session;
+ struct sk_buff_head tx_queue;
+ struct timer_list timer;
++ struct work_struct state_change_work;
+
+ struct mutex lock;
+ unsigned long state;
+@@ -186,6 +187,7 @@ struct rfcomm_dlc {
+ u8 sec_level;
+ u8 role_switch;
+ u32 defer_setup;
++ int err;
+
+ uint mtu;
+ uint cfc;
+@@ -310,6 +312,7 @@ struct rfcomm_pinfo {
+ u8 role_switch;
+ };
+
++void __rfcomm_sk_state_change(struct work_struct *work);
+ int rfcomm_init_sockets(void);
+ void rfcomm_cleanup_sockets(void);
+
+diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
+index 7324764384b677..c6494e85cd68b2 100644
+--- a/net/bluetooth/rfcomm/core.c
++++ b/net/bluetooth/rfcomm/core.c
+@@ -289,6 +289,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
+ d->flags = 0;
+ d->mscex = 0;
+ d->sec_level = BT_SECURITY_LOW;
++ d->err = 0;
+ d->mtu = RFCOMM_DEFAULT_MTU;
+ d->v24_sig = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
+
+@@ -306,6 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
+ timer_setup(&d->timer, rfcomm_dlc_timeout, 0);
+
+ skb_queue_head_init(&d->tx_queue);
++ INIT_WORK(&d->state_change_work, __rfcomm_sk_state_change);
+ mutex_init(&d->lock);
+ refcount_set(&d->refcnt, 1);
+
+diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
+index 4bf4ea6cbb5eee..4850dafbaa05fb 100644
+--- a/net/bluetooth/rfcomm/sock.c
++++ b/net/bluetooth/rfcomm/sock.c
+@@ -61,19 +61,22 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
+ rfcomm_dlc_throttle(d);
+ }
+
+-static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
++void __rfcomm_sk_state_change(struct work_struct *work)
+ {
++ struct rfcomm_dlc *d = container_of(work, struct rfcomm_dlc,
++ state_change_work);
+ struct sock *sk = d->owner, *parent;
+
+ if (!sk)
+ return;
+
+- BT_DBG("dlc %p state %ld err %d", d, d->state, err);
+-
+ lock_sock(sk);
++ rfcomm_dlc_lock(d);
+
+- if (err)
+- sk->sk_err = err;
++ BT_DBG("dlc %p state %ld err %d", d, d->state, d->err);
++
++ if (d->err)
++ sk->sk_err = d->err;
+
+ sk->sk_state = d->state;
+
+@@ -91,15 +94,22 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+ sk->sk_state_change(sk);
+ }
+
++ rfcomm_dlc_unlock(d);
+ release_sock(sk);
++ sock_put(sk);
++}
+
+- if (parent && sock_flag(sk, SOCK_ZAPPED)) {
+- /* We have to drop DLC lock here, otherwise
+- * rfcomm_sock_destruct() will dead lock. */
+- rfcomm_dlc_unlock(d);
+- rfcomm_sock_kill(sk);
+- rfcomm_dlc_lock(d);
+- }
++static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
++{
++ struct sock *sk = d->owner;
++
++ if (!sk)
++ return;
++
++ d->err = err;
++ sock_hold(sk);
++ if (!schedule_work(&d->state_change_work))
++ sock_put(sk);
+ }
+
+ /* ---- Socket functions ---- */