diff options
Diffstat (limited to '0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch')
-rw-r--r-- | 0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch b/0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch new file mode 100644 index 000000000000..8e1a8dd0ecd3 --- /dev/null +++ b/0048-neigh-fix-possible-DoS-due-to-net-iface-start-stop-l.patch @@ -0,0 +1,128 @@ +From 2dd5ed474115150d8175825bc3b56c6385c3a83b Mon Sep 17 00:00:00 2001 +From: "Denis V. Lunev" <den@openvz.org> +Date: Thu, 11 Aug 2022 18:20:11 +0300 +Subject: [PATCH 48/73] neigh: fix possible DoS due to net iface start/stop + loop + +[ Upstream commit 66ba215cb51323e4e55e38fd5f250e0fae0cbc94 ] + +Normal processing of ARP request (usually this is Ethernet broadcast +packet) coming to the host is looking like the following: +* the packet comes to arp_process() call and is passed through routing + procedure +* the request is put into the queue using pneigh_enqueue() if + corresponding ARP record is not local (common case for container + records on the host) +* the request is processed by timer (within 80 jiffies by default) and + ARP reply is sent from the same arp_process() using + NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED condition (flag is set inside + pneigh_enqueue()) + +And here the problem comes. Linux kernel calls pneigh_queue_purge() +which destroys the whole queue of ARP requests on ANY network interface +start/stop event through __neigh_ifdown(). + +This is actually not a problem within the original world as network +interface start/stop was accessible to the host 'root' only, which +could do more destructive things. But the world is changed and there +are Linux containers available. Here container 'root' has an access +to this API and could be considered as untrusted user in the hosting +(container's) world. + +Thus there is an attack vector to other containers on node when +container's root will endlessly start/stop interfaces. We have observed +similar situation on a real production node when docker container was +doing such activity and thus other containers on the node become not +accessible. + +The patch proposed doing very simple thing. It drops only packets from +the same namespace in the pneigh_queue_purge() where network interface +state change is detected. This is enough to prevent the problem for the +whole node preserving original semantics of the code. + +v2: + - do del_timer_sync() if queue is empty after pneigh_queue_purge() +v3: + - rebase to net tree + +Cc: "David S. Miller" <davem@davemloft.net> +Cc: Eric Dumazet <edumazet@google.com> +Cc: Jakub Kicinski <kuba@kernel.org> +Cc: Paolo Abeni <pabeni@redhat.com> +Cc: Daniel Borkmann <daniel@iogearbox.net> +Cc: David Ahern <dsahern@kernel.org> +Cc: Yajun Deng <yajun.deng@linux.dev> +Cc: Roopa Prabhu <roopa@nvidia.com> +Cc: Christian Brauner <brauner@kernel.org> +Cc: netdev@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Cc: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> +Cc: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com> +Cc: Konstantin Khorenko <khorenko@virtuozzo.com> +Cc: kernel@openvz.org +Cc: devel@openvz.org +Investigated-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com> +Signed-off-by: Denis V. Lunev <den@openvz.org> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Sasha Levin <sashal@kernel.org> +--- + net/core/neighbour.c | 25 +++++++++++++++++-------- + 1 file changed, 17 insertions(+), 8 deletions(-) + +diff --git a/net/core/neighbour.c b/net/core/neighbour.c +index 54625287ee5b..19d99d1eff53 100644 +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -307,14 +307,23 @@ static int neigh_del_timer(struct neighbour *n) + return 0; + } + +-static void pneigh_queue_purge(struct sk_buff_head *list) ++static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) + { ++ unsigned long flags; + struct sk_buff *skb; + +- while ((skb = skb_dequeue(list)) != NULL) { +- dev_put(skb->dev); +- kfree_skb(skb); ++ spin_lock_irqsave(&list->lock, flags); ++ skb = skb_peek(list); ++ while (skb != NULL) { ++ struct sk_buff *skb_next = skb_peek_next(skb, list); ++ if (net == NULL || net_eq(dev_net(skb->dev), net)) { ++ __skb_unlink(skb, list); ++ dev_put(skb->dev); ++ kfree_skb(skb); ++ } ++ skb = skb_next; + } ++ spin_unlock_irqrestore(&list->lock, flags); + } + + static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, +@@ -385,9 +394,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, + write_lock_bh(&tbl->lock); + neigh_flush_dev(tbl, dev, skip_perm); + pneigh_ifdown_and_unlock(tbl, dev); +- +- del_timer_sync(&tbl->proxy_timer); +- pneigh_queue_purge(&tbl->proxy_queue); ++ pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); ++ if (skb_queue_empty_lockless(&tbl->proxy_queue)) ++ del_timer_sync(&tbl->proxy_timer); + return 0; + } + +@@ -1787,7 +1796,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) + cancel_delayed_work_sync(&tbl->managed_work); + cancel_delayed_work_sync(&tbl->gc_work); + del_timer_sync(&tbl->proxy_timer); +- pneigh_queue_purge(&tbl->proxy_queue); ++ pneigh_queue_purge(&tbl->proxy_queue, NULL); + neigh_ifdown(tbl, NULL); + if (atomic_read(&tbl->entries)) + pr_crit("neighbour leakage\n"); +-- +2.37.3 + |