summarylogtreecommitdiffstats
path: root/0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch
diff options
context:
space:
mode:
Diffstat (limited to '0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch')
-rw-r--r--0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch128
1 files changed, 128 insertions, 0 deletions
diff --git a/0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch b/0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch
new file mode 100644
index 000000000000..8e1a8dd0ecd3
--- /dev/null
+++ b/0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch
@@ -0,0 +1,128 @@
+From 2dd5ed474115150d8175825bc3b56c6385c3a83b Mon Sep 17 00:00:00 2001
+From: "Denis V. Lunev" <den@openvz.org>
+Date: Thu, 11 Aug 2022 18:20:11 +0300
+Subject: [PATCH 48/73] neigh: fix possible DoS due to net iface start/stop
+ loop
+
+[ Upstream commit 66ba215cb51323e4e55e38fd5f250e0fae0cbc94 ]
+
+Normal processing of ARP request (usually this is Ethernet broadcast
+packet) coming to the host is looking like the following:
+* the packet comes to arp_process() call and is passed through routing
+ procedure
+* the request is put into the queue using pneigh_enqueue() if
+ corresponding ARP record is not local (common case for container
+ records on the host)
+* the request is processed by timer (within 80 jiffies by default) and
+ ARP reply is sent from the same arp_process() using
+ NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED condition (flag is set inside
+ pneigh_enqueue())
+
+And here the problem comes. Linux kernel calls pneigh_queue_purge()
+which destroys the whole queue of ARP requests on ANY network interface
+start/stop event through __neigh_ifdown().
+
+This is actually not a problem within the original world as network
+interface start/stop was accessible to the host 'root' only, which
+could do more destructive things. But the world is changed and there
+are Linux containers available. Here container 'root' has an access
+to this API and could be considered as untrusted user in the hosting
+(container's) world.
+
+Thus there is an attack vector to other containers on node when
+container's root will endlessly start/stop interfaces. We have observed
+similar situation on a real production node when docker container was
+doing such activity and thus other containers on the node become not
+accessible.
+
+The patch proposed doing very simple thing. It drops only packets from
+the same namespace in the pneigh_queue_purge() where network interface
+state change is detected. This is enough to prevent the problem for the
+whole node preserving original semantics of the code.
+
+v2:
+ - do del_timer_sync() if queue is empty after pneigh_queue_purge()
+v3:
+ - rebase to net tree
+
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Daniel Borkmann <daniel@iogearbox.net>
+Cc: David Ahern <dsahern@kernel.org>
+Cc: Yajun Deng <yajun.deng@linux.dev>
+Cc: Roopa Prabhu <roopa@nvidia.com>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: netdev@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+Cc: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Cc: Konstantin Khorenko <khorenko@virtuozzo.com>
+Cc: kernel@openvz.org
+Cc: devel@openvz.org
+Investigated-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
+Signed-off-by: Denis V. Lunev <den@openvz.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/neighbour.c | 25 +++++++++++++++++--------
+ 1 file changed, 17 insertions(+), 8 deletions(-)
+
+diff --git a/net/core/neighbour.c b/net/core/neighbour.c
+index 54625287ee5b..19d99d1eff53 100644
+--- a/net/core/neighbour.c
++++ b/net/core/neighbour.c
+@@ -307,14 +307,23 @@ static int neigh_del_timer(struct neighbour *n)
+ return 0;
+ }
+
+-static void pneigh_queue_purge(struct sk_buff_head *list)
++static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
+ {
++ unsigned long flags;
+ struct sk_buff *skb;
+
+- while ((skb = skb_dequeue(list)) != NULL) {
+- dev_put(skb->dev);
+- kfree_skb(skb);
++ spin_lock_irqsave(&list->lock, flags);
++ skb = skb_peek(list);
++ while (skb != NULL) {
++ struct sk_buff *skb_next = skb_peek_next(skb, list);
++ if (net == NULL || net_eq(dev_net(skb->dev), net)) {
++ __skb_unlink(skb, list);
++ dev_put(skb->dev);
++ kfree_skb(skb);
++ }
++ skb = skb_next;
+ }
++ spin_unlock_irqrestore(&list->lock, flags);
+ }
+
+ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
+@@ -385,9 +394,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ write_lock_bh(&tbl->lock);
+ neigh_flush_dev(tbl, dev, skip_perm);
+ pneigh_ifdown_and_unlock(tbl, dev);
+-
+- del_timer_sync(&tbl->proxy_timer);
+- pneigh_queue_purge(&tbl->proxy_queue);
++ pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev));
++ if (skb_queue_empty_lockless(&tbl->proxy_queue))
++ del_timer_sync(&tbl->proxy_timer);
+ return 0;
+ }
+
+@@ -1787,7 +1796,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl)
+ cancel_delayed_work_sync(&tbl->managed_work);
+ cancel_delayed_work_sync(&tbl->gc_work);
+ del_timer_sync(&tbl->proxy_timer);
+- pneigh_queue_purge(&tbl->proxy_queue);
++ pneigh_queue_purge(&tbl->proxy_queue, NULL);
+ neigh_ifdown(tbl, NULL);
+ if (atomic_read(&tbl->entries))
+ pr_crit("neighbour leakage\n");
+--
+2.37.3
+