summarylogtreecommitdiffstats
path: root/0003-Bluetooth-fix-deadlock-for-RFCOMM-sk-state-change.patch
blob: cfaeb7a44543e26c7e82c24d4de224c0ca961bc1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
From 430daaab3c78de6bd82f10cfb5a0f016c6e583f6 Mon Sep 17 00:00:00 2001
From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Date: Mon, 4 Oct 2021 14:07:34 -0400
Subject: [PATCH 03/13] Bluetooth: fix deadlock for RFCOMM sk state change

Syzbot reports the following task hang [1]:

INFO: task syz-executor255:8499 blocked for more than 143 seconds.
      Not tainted 5.14.0-rc7-syzkaller #0

Call Trace:
 context_switch kernel/sched/core.c:4681 [inline]
 __schedule+0x93a/0x26f0 kernel/sched/core.c:5938
 schedule+0xd3/0x270 kernel/sched/core.c:6017
 __lock_sock+0x13d/0x260 net/core/sock.c:2644
 lock_sock_nested+0xf6/0x120 net/core/sock.c:3185
 lock_sock include/net/sock.h:1612 [inline]
 rfcomm_sk_state_change+0xb4/0x390 net/bluetooth/rfcomm/sock.c:73
 __rfcomm_dlc_close+0x1b6/0x8a0 net/bluetooth/rfcomm/core.c:489
 rfcomm_dlc_close+0x1ea/0x240 net/bluetooth/rfcomm/core.c:520
 __rfcomm_sock_close+0xac/0x260 net/bluetooth/rfcomm/sock.c:220
 rfcomm_sock_shutdown+0xe9/0x210 net/bluetooth/rfcomm/sock.c:931
 rfcomm_sock_release+0x5f/0x140 net/bluetooth/rfcomm/sock.c:951
 __sock_release+0xcd/0x280 net/socket.c:649
 sock_close+0x18/0x20 net/socket.c:1314
 __fput+0x288/0x920 fs/file_table.c:280
 task_work_run+0xdd/0x1a0 kernel/task_work.c:164
 exit_task_work include/linux/task_work.h:32 [inline]
 do_exit+0xbd4/0x2a60 kernel/exit.c:825
 do_group_exit+0x125/0x310 kernel/exit.c:922
 get_signal+0x47f/0x2160 kernel/signal.c:2808
 arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:865
 handle_signal_work kernel/entry/common.c:148 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:172 [inline]
 exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:209
 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline]
 syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:302
 do_syscall_64+0x42/0xb0 arch/x86/entry/common.c:86
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Showing all locks held in the system:
1 lock held by khungtaskd/1653:
 #0: ffffffff8b97c280 (rcu_read_lock){....}-{1:2}, at:
 debug_show_all_locks+0x53/0x260 kernel/locking/lockdep.c:6446
1 lock held by krfcommd/4781:
 #0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
 rfcomm_process_sessions net/bluetooth/rfcomm/core.c:1979 [inline]
 #0: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
 rfcomm_run+0x2ed/0x4a20 net/bluetooth/rfcomm/core.c:2086
2 locks held by in:imklog/8206:
 #0: ffff8880182ce5f0 (&f->f_pos_lock){+.+.}-{3:3}, at:
 __fdget_pos+0xe9/0x100 fs/file.c:974
 #1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
 raw_spin_rq_lock_nested kernel/sched/core.c:460 [inline]
 #1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock
 kernel/sched/sched.h:1307 [inline]
 #1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at: rq_lock
 kernel/sched/sched.h:1610 [inline]
 #1: ffff8880b9c51a58 (&rq->__lock){-.-.}-{2:2}, at:
 __schedule+0x233/0x26f0 kernel/sched/core.c:5852
4 locks held by syz-executor255/8499:
 #0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
 inode_lock include/linux/fs.h:774 [inline]
 #0: ffff888039a83690 (&sb->s_type->i_mutex_key#13){+.+.}-{3:3}, at:
 __sock_release+0x86/0x280 net/socket.c:648
 #1:
 ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
 at: lock_sock include/net/sock.h:1612 [inline]
 #1:
 ffff88802fa31120 (sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM){+.+.}-{0:0},
 at: rfcomm_sock_shutdown+0x54/0x210 net/bluetooth/rfcomm/sock.c:928
 #2: ffffffff8d306528 (rfcomm_mutex){+.+.}-{3:3}, at:
 rfcomm_dlc_close+0x34/0x240 net/bluetooth/rfcomm/core.c:507
 #3: ffff888141bd6d28 (&d->lock){+.+.}-{3:3}, at:
 __rfcomm_dlc_close+0x162/0x8a0 net/bluetooth/rfcomm/core.c:487
==================================================================

The task hangs because of a deadlock that occurs when lock_sock() is
called in rfcomm_sk_state_change(). One such call stack is:

  rfcomm_sock_shutdown():
    lock_sock();
    __rfcomm_sock_close():
      rfcomm_dlc_close():
        __rfcomm_dlc_close():
          rfcomm_dlc_lock();
          rfcomm_sk_state_change():
            lock_sock();

lock_sock() has to be called when the sk state is changed because the
lock is not always held when rfcomm_sk_state_change() is
called. However, besides the recursive deadlock, there is also an
issue of a lock hierarchy inversion between rfcomm_dlc_lock() and
lock_sock() if the socket is locked in rfcomm_sk_state_change().

To avoid these issues, we can instead schedule the sk state change in
the global workqueue. This is already the implicit assumption about
how sk state changes happen. For example, in rfcomm_sock_shutdown(),
the call to __rfcomm_sock_close() is followed by
bt_sock_wait_state().

Additionally, the call to rfcomm_sock_kill() inside
rfcomm_sk_state_change() should be removed. The socket shouldn't be
killed here because only rfcomm_sock_release() calls sock_orphan(),
which it already follows up with a call to rfcomm_sock_kill().

Fixes: b7ce436a5d79 ("Bluetooth: switch to lock_sock in RFCOMM")
Link: https://syzkaller.appspot.com/bug?extid=7d51f807c81b190a127d [1]
Reported-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
Tested-by: syzbot+7d51f807c81b190a127d@syzkaller.appspotmail.com
Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com>
Cc: Hillf Danton <hdanton@sina.com>
---
 include/net/bluetooth/rfcomm.h |  3 +++
 net/bluetooth/rfcomm/core.c    |  2 ++
 net/bluetooth/rfcomm/sock.c    | 34 ++++++++++++++++++++++------------
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 99d26879b02a..a92799fc5e74 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -171,6 +171,7 @@ struct rfcomm_dlc {
 	struct rfcomm_session *session;
 	struct sk_buff_head   tx_queue;
 	struct timer_list     timer;
+	struct work_struct    state_change_work;
 
 	struct mutex  lock;
 	unsigned long state;
@@ -186,6 +187,7 @@ struct rfcomm_dlc {
 	u8            sec_level;
 	u8            role_switch;
 	u32           defer_setup;
+	int           err;
 
 	uint          mtu;
 	uint          cfc;
@@ -310,6 +312,7 @@ struct rfcomm_pinfo {
 	u8     role_switch;
 };
 
+void __rfcomm_sk_state_change(struct work_struct *work);
 int  rfcomm_init_sockets(void);
 void rfcomm_cleanup_sockets(void);
 
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7324764384b6..c6494e85cd68 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -289,6 +289,7 @@ static void rfcomm_dlc_clear_state(struct rfcomm_dlc *d)
 	d->flags      = 0;
 	d->mscex      = 0;
 	d->sec_level  = BT_SECURITY_LOW;
+	d->err        = 0;
 	d->mtu        = RFCOMM_DEFAULT_MTU;
 	d->v24_sig    = RFCOMM_V24_RTC | RFCOMM_V24_RTR | RFCOMM_V24_DV;
 
@@ -306,6 +307,7 @@ struct rfcomm_dlc *rfcomm_dlc_alloc(gfp_t prio)
 	timer_setup(&d->timer, rfcomm_dlc_timeout, 0);
 
 	skb_queue_head_init(&d->tx_queue);
+	INIT_WORK(&d->state_change_work, __rfcomm_sk_state_change);
 	mutex_init(&d->lock);
 	refcount_set(&d->refcnt, 1);
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 4bf4ea6cbb5e..4850dafbaa05 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -61,19 +61,22 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
 		rfcomm_dlc_throttle(d);
 }
 
-static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+void __rfcomm_sk_state_change(struct work_struct *work)
 {
+	struct rfcomm_dlc *d = container_of(work, struct rfcomm_dlc,
+					    state_change_work);
 	struct sock *sk = d->owner, *parent;
 
 	if (!sk)
 		return;
 
-	BT_DBG("dlc %p state %ld err %d", d, d->state, err);
-
 	lock_sock(sk);
+	rfcomm_dlc_lock(d);
 
-	if (err)
-		sk->sk_err = err;
+	BT_DBG("dlc %p state %ld err %d", d, d->state, d->err);
+
+	if (d->err)
+		sk->sk_err = d->err;
 
 	sk->sk_state = d->state;
 
@@ -91,15 +94,22 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
 		sk->sk_state_change(sk);
 	}
 
+	rfcomm_dlc_unlock(d);
 	release_sock(sk);
+	sock_put(sk);
+}
 
-	if (parent && sock_flag(sk, SOCK_ZAPPED)) {
-		/* We have to drop DLC lock here, otherwise
-		 * rfcomm_sock_destruct() will dead lock. */
-		rfcomm_dlc_unlock(d);
-		rfcomm_sock_kill(sk);
-		rfcomm_dlc_lock(d);
-	}
+static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
+{
+	struct sock *sk = d->owner;
+
+	if (!sk)
+		return;
+
+	d->err = err;
+	sock_hold(sk);
+	if (!schedule_work(&d->state_change_work))
+		sock_put(sk);
 }
 
 /* ---- Socket functions ---- */
-- 
2.38.0