mirror of
https://github.com/openwrt/routing.git
synced 2025-12-21 21:24:32 +04:00
Merge pull request #1104 from ecsv/batadv-for-24.10
openwrt-24.10: batman-adv: Merge bugfixes from 2025.0
This commit is contained in:
@@ -4,7 +4,7 @@ include $(TOPDIR)/rules.mk
|
||||
|
||||
PKG_NAME:=batman-adv
|
||||
PKG_VERSION:=2024.3
|
||||
PKG_RELEASE:=3
|
||||
PKG_RELEASE:=4
|
||||
|
||||
PKG_SOURCE:=$(PKG_NAME)-$(PKG_VERSION).tar.gz
|
||||
PKG_SOURCE_URL:=https://downloads.open-mesh.org/batman/releases/batman-adv-$(PKG_VERSION)
|
||||
|
||||
@@ -0,0 +1,71 @@
|
||||
From: Andy Strohman <andrew@andrewstrohman.com>
|
||||
Date: Thu, 9 Jan 2025 02:27:56 +0000
|
||||
Subject: batman-adv: fix panic during interface removal
|
||||
|
||||
Reference counting is used to ensure that
|
||||
batadv_hardif_neigh_node and batadv_hard_iface
|
||||
are not freed before/during
|
||||
batadv_v_elp_throughput_metric_update work is
|
||||
finished.
|
||||
|
||||
But there isn't a guarantee that the hard if will
|
||||
remain associated with a soft interface up until
|
||||
the work is finished.
|
||||
|
||||
This fixes a crash triggered by reboot that looks
|
||||
like this:
|
||||
|
||||
Call trace:
|
||||
batadv_v_mesh_free+0xd0/0x4dc [batman_adv]
|
||||
batadv_v_elp_throughput_metric_update+0x1c/0xa4
|
||||
process_one_work+0x178/0x398
|
||||
worker_thread+0x2e8/0x4d0
|
||||
kthread+0xd8/0xdc
|
||||
ret_from_fork+0x10/0x20
|
||||
|
||||
(the batadv_v_mesh_free call is misleading,
|
||||
and does not actually happen)
|
||||
|
||||
I was able to make the issue happen more reliably
|
||||
by changing hardif_neigh->bat_v.metric_work work
|
||||
to be delayed work. This allowed me to track down
|
||||
and confirm the fix.
|
||||
|
||||
Fixes: 5c3245172c01 ("batman-adv: ELP - compute the metric based on the estimated throughput")
|
||||
Signed-off-by: Andy Strohman <andrew@andrewstrohman.com>
|
||||
[sven@narfation.org: prevent entering batadv_v_elp_get_throughput without
|
||||
soft_iface]
|
||||
Signed-off-by: Sven Eckelmann <sven@narfation.org>
|
||||
Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/1d7c8ac629678b8cd23ef9def7c7b111cb9e8ed5
|
||||
|
||||
--- a/net/batman-adv/bat_v_elp.c
|
||||
+++ b/net/batman-adv/bat_v_elp.c
|
||||
@@ -66,12 +66,19 @@ static void batadv_v_elp_start_timer(str
|
||||
static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
|
||||
{
|
||||
struct batadv_hard_iface *hard_iface = neigh->if_incoming;
|
||||
+ struct net_device *soft_iface = hard_iface->soft_iface;
|
||||
struct ethtool_link_ksettings link_settings;
|
||||
struct net_device *real_netdev;
|
||||
struct station_info sinfo;
|
||||
u32 throughput;
|
||||
int ret;
|
||||
|
||||
+ /* don't query throughput when no longer associated with any
|
||||
+ * batman-adv interface
|
||||
+ */
|
||||
+ if (!soft_iface)
|
||||
+ return BATADV_THROUGHPUT_DEFAULT_VALUE;
|
||||
+
|
||||
/* if the user specified a customised value for this interface, then
|
||||
* return it directly
|
||||
*/
|
||||
@@ -141,7 +148,7 @@ static u32 batadv_v_elp_get_throughput(s
|
||||
|
||||
default_throughput:
|
||||
if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) {
|
||||
- batadv_info(hard_iface->soft_iface,
|
||||
+ batadv_info(soft_iface,
|
||||
"WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n",
|
||||
hard_iface->net_dev->name,
|
||||
BATADV_THROUGHPUT_DEFAULT_VALUE / 10,
|
||||
@@ -0,0 +1,127 @@
|
||||
From: Sven Eckelmann <sven@narfation.org>
|
||||
Date: Wed, 22 Jan 2025 21:51:20 +0100
|
||||
Subject: batman-adv: Ignore neighbor throughput metrics in error case
|
||||
|
||||
If a temporary error happened in the evaluation of the neighbor throughput
|
||||
information, then the invalid throughput result should not be stored in the
|
||||
throughtput EWMA.
|
||||
|
||||
Signed-off-by: Sven Eckelmann <sven@narfation.org>
|
||||
Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/ce8b40b7bdcc7f011191acda4c2d3067566eef54
|
||||
|
||||
--- a/net/batman-adv/bat_v_elp.c
|
||||
+++ b/net/batman-adv/bat_v_elp.c
|
||||
@@ -59,11 +59,13 @@ static void batadv_v_elp_start_timer(str
|
||||
/**
|
||||
* batadv_v_elp_get_throughput() - get the throughput towards a neighbour
|
||||
* @neigh: the neighbour for which the throughput has to be obtained
|
||||
+ * @pthroughput: calculated throughput towards the given neighbour in multiples
|
||||
+ * of 100kpbs (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc).
|
||||
*
|
||||
- * Return: The throughput towards the given neighbour in multiples of 100kpbs
|
||||
- * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc).
|
||||
+ * Return: true when value behind @pthroughput was set
|
||||
*/
|
||||
-static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
|
||||
+static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
|
||||
+ u32 *pthroughput)
|
||||
{
|
||||
struct batadv_hard_iface *hard_iface = neigh->if_incoming;
|
||||
struct net_device *soft_iface = hard_iface->soft_iface;
|
||||
@@ -77,14 +79,16 @@ static u32 batadv_v_elp_get_throughput(s
|
||||
* batman-adv interface
|
||||
*/
|
||||
if (!soft_iface)
|
||||
- return BATADV_THROUGHPUT_DEFAULT_VALUE;
|
||||
+ return false;
|
||||
|
||||
/* if the user specified a customised value for this interface, then
|
||||
* return it directly
|
||||
*/
|
||||
throughput = atomic_read(&hard_iface->bat_v.throughput_override);
|
||||
- if (throughput != 0)
|
||||
- return throughput;
|
||||
+ if (throughput != 0) {
|
||||
+ *pthroughput = throughput;
|
||||
+ return true;
|
||||
+ }
|
||||
|
||||
/* if this is a wireless device, then ask its throughput through
|
||||
* cfg80211 API
|
||||
@@ -111,19 +115,24 @@ static u32 batadv_v_elp_get_throughput(s
|
||||
* possible to delete this neighbor. For now set
|
||||
* the throughput metric to 0.
|
||||
*/
|
||||
- return 0;
|
||||
+ *pthroughput = 0;
|
||||
+ return true;
|
||||
}
|
||||
if (ret)
|
||||
goto default_throughput;
|
||||
|
||||
- if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT))
|
||||
- return sinfo.expected_throughput / 100;
|
||||
+ if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) {
|
||||
+ *pthroughput = sinfo.expected_throughput / 100;
|
||||
+ return true;
|
||||
+ }
|
||||
|
||||
/* try to estimate the expected throughput based on reported tx
|
||||
* rates
|
||||
*/
|
||||
- if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))
|
||||
- return cfg80211_calculate_bitrate(&sinfo.txrate) / 3;
|
||||
+ if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) {
|
||||
+ *pthroughput = cfg80211_calculate_bitrate(&sinfo.txrate) / 3;
|
||||
+ return true;
|
||||
+ }
|
||||
|
||||
goto default_throughput;
|
||||
}
|
||||
@@ -142,8 +151,10 @@ static u32 batadv_v_elp_get_throughput(s
|
||||
hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
|
||||
|
||||
throughput = link_settings.base.speed;
|
||||
- if (throughput && throughput != SPEED_UNKNOWN)
|
||||
- return throughput * 10;
|
||||
+ if (throughput && throughput != SPEED_UNKNOWN) {
|
||||
+ *pthroughput = throughput * 10;
|
||||
+ return true;
|
||||
+ }
|
||||
}
|
||||
|
||||
default_throughput:
|
||||
@@ -157,7 +168,8 @@ default_throughput:
|
||||
}
|
||||
|
||||
/* if none of the above cases apply, return the base_throughput */
|
||||
- return BATADV_THROUGHPUT_DEFAULT_VALUE;
|
||||
+ *pthroughput = BATADV_THROUGHPUT_DEFAULT_VALUE;
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -169,15 +181,21 @@ void batadv_v_elp_throughput_metric_upda
|
||||
{
|
||||
struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
|
||||
struct batadv_hardif_neigh_node *neigh;
|
||||
+ u32 throughput;
|
||||
+ bool valid;
|
||||
|
||||
neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
|
||||
metric_work);
|
||||
neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
|
||||
bat_v);
|
||||
|
||||
- ewma_throughput_add(&neigh->bat_v.throughput,
|
||||
- batadv_v_elp_get_throughput(neigh));
|
||||
+ valid = batadv_v_elp_get_throughput(neigh, &throughput);
|
||||
+ if (!valid)
|
||||
+ goto put_neigh;
|
||||
+
|
||||
+ ewma_throughput_add(&neigh->bat_v.throughput, throughput);
|
||||
|
||||
+put_neigh:
|
||||
/* decrement refcounter to balance increment performed before scheduling
|
||||
* this task
|
||||
*/
|
||||
@@ -0,0 +1,239 @@
|
||||
From: Sven Eckelmann <sven@narfation.org>
|
||||
Date: Wed, 22 Jan 2025 21:51:21 +0100
|
||||
Subject: batman-adv: Drop unmanaged ELP metric worker
|
||||
|
||||
The ELP worker needs to calculate new metric values for all neighbors
|
||||
"reachable" over an interface. Some of the used metric sources require
|
||||
locks which might need to sleep. This sleep is incompatible with the RCU
|
||||
list iterator used for the recorded neighbors. The initial approach to work
|
||||
around of this problem was to queue another work item per neighbor and then
|
||||
run this in a new context.
|
||||
|
||||
Even when this solved the RCU vs might_sleep() conflict, it has a major
|
||||
problems: Nothing was stopping the work item in case it is not needed
|
||||
anymore - for example because one of the related interfaces was removed or
|
||||
the batman-adv module was unloaded - resulting in potential invalid memory
|
||||
accesses.
|
||||
|
||||
Directly canceling the metric worker also has various problems:
|
||||
|
||||
* cancel_work_sync for a to-be-deactivated interface is called with
|
||||
rtnl_lock held. But the code in the ELP metric worker also tries to use
|
||||
rtnl_lock() - which will never return in this case. This also means that
|
||||
cancel_work_sync would never return because it is waiting for the worker
|
||||
to finish.
|
||||
* iterating over the neighbor list for the to-be-deactivated interface is
|
||||
currently done using the RCU specific methods. Which means that it is
|
||||
possible to miss items when iterating over it without the associated
|
||||
spinlock - a behaviour which is acceptable for a periodic metric check
|
||||
but not for a cleanup routine (which must "stop" all still running
|
||||
workers)
|
||||
|
||||
The better approch is to get rid of the per interface neighbor metric
|
||||
worker and handle everything in the interface worker. The original problems
|
||||
are solved by:
|
||||
|
||||
* creating a list of neighbors which require new metric information inside
|
||||
the RCU protected context, gathering the metric according to the new list
|
||||
outside the RCU protected context
|
||||
* only use rcu_trylock inside metric gathering code to avoid a deadlock
|
||||
when the cancel_delayed_work_sync is called in the interface removal code
|
||||
(which is called with the rtnl_lock held)
|
||||
|
||||
Fixes: 5c3245172c01 ("batman-adv: ELP - compute the metric based on the estimated throughput")
|
||||
Signed-off-by: Sven Eckelmann <sven@narfation.org>
|
||||
Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/405d49a20a205799ec453cadb3d078dc2808d563
|
||||
|
||||
--- a/net/batman-adv/bat_v.c
|
||||
+++ b/net/batman-adv/bat_v.c
|
||||
@@ -113,8 +113,6 @@ static void
|
||||
batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
|
||||
{
|
||||
ewma_throughput_init(&hardif_neigh->bat_v.throughput);
|
||||
- INIT_WORK(&hardif_neigh->bat_v.metric_work,
|
||||
- batadv_v_elp_throughput_metric_update);
|
||||
}
|
||||
|
||||
/**
|
||||
--- a/net/batman-adv/bat_v_elp.c
|
||||
+++ b/net/batman-adv/bat_v_elp.c
|
||||
@@ -18,6 +18,7 @@
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kref.h>
|
||||
+#include <linux/list.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/nl80211.h>
|
||||
@@ -26,6 +27,7 @@
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <linux/skbuff.h>
|
||||
+#include <linux/slab.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
@@ -42,6 +44,18 @@
|
||||
#include "send.h"
|
||||
|
||||
/**
|
||||
+ * struct batadv_v_metric_queue_entry - list of hardif neighbors which require
|
||||
+ * and metric update
|
||||
+ */
|
||||
+struct batadv_v_metric_queue_entry {
|
||||
+ /** @hardif_neigh: hardif neighbor scheduled for metric update */
|
||||
+ struct batadv_hardif_neigh_node *hardif_neigh;
|
||||
+
|
||||
+ /** @list: list node for metric_queue */
|
||||
+ struct list_head list;
|
||||
+};
|
||||
+
|
||||
+/**
|
||||
* batadv_v_elp_start_timer() - restart timer for ELP periodic work
|
||||
* @hard_iface: the interface for which the timer has to be reset
|
||||
*/
|
||||
@@ -137,10 +151,17 @@ static bool batadv_v_elp_get_throughput(
|
||||
goto default_throughput;
|
||||
}
|
||||
|
||||
+ /* only use rtnl_trylock because the elp worker will be cancelled while
|
||||
+ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
|
||||
+ * wait forever when the elp work_item was started and it is then also
|
||||
+ * trying to rtnl_lock
|
||||
+ */
|
||||
+ if (!rtnl_trylock())
|
||||
+ return false;
|
||||
+
|
||||
/* if not a wifi interface, check if this device provides data via
|
||||
* ethtool (e.g. an Ethernet adapter)
|
||||
*/
|
||||
- rtnl_lock();
|
||||
ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
|
||||
rtnl_unlock();
|
||||
if (ret == 0) {
|
||||
@@ -175,31 +196,19 @@ default_throughput:
|
||||
/**
|
||||
* batadv_v_elp_throughput_metric_update() - worker updating the throughput
|
||||
* metric of a single hop neighbour
|
||||
- * @work: the work queue item
|
||||
+ * @neigh: the neighbour to probe
|
||||
*/
|
||||
-void batadv_v_elp_throughput_metric_update(struct work_struct *work)
|
||||
+static void
|
||||
+batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh)
|
||||
{
|
||||
- struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
|
||||
- struct batadv_hardif_neigh_node *neigh;
|
||||
u32 throughput;
|
||||
bool valid;
|
||||
|
||||
- neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
|
||||
- metric_work);
|
||||
- neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
|
||||
- bat_v);
|
||||
-
|
||||
valid = batadv_v_elp_get_throughput(neigh, &throughput);
|
||||
if (!valid)
|
||||
- goto put_neigh;
|
||||
+ return;
|
||||
|
||||
ewma_throughput_add(&neigh->bat_v.throughput, throughput);
|
||||
-
|
||||
-put_neigh:
|
||||
- /* decrement refcounter to balance increment performed before scheduling
|
||||
- * this task
|
||||
- */
|
||||
- batadv_hardif_neigh_put(neigh);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -273,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct bat
|
||||
*/
|
||||
static void batadv_v_elp_periodic_work(struct work_struct *work)
|
||||
{
|
||||
+ struct batadv_v_metric_queue_entry *metric_entry;
|
||||
+ struct batadv_v_metric_queue_entry *metric_safe;
|
||||
struct batadv_hardif_neigh_node *hardif_neigh;
|
||||
struct batadv_hard_iface *hard_iface;
|
||||
struct batadv_hard_iface_bat_v *bat_v;
|
||||
struct batadv_elp_packet *elp_packet;
|
||||
+ struct list_head metric_queue;
|
||||
struct batadv_priv *bat_priv;
|
||||
struct sk_buff *skb;
|
||||
u32 elp_interval;
|
||||
- bool ret;
|
||||
|
||||
bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
|
||||
hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
|
||||
@@ -316,6 +327,8 @@ static void batadv_v_elp_periodic_work(s
|
||||
|
||||
atomic_inc(&hard_iface->bat_v.elp_seqno);
|
||||
|
||||
+ INIT_LIST_HEAD(&metric_queue);
|
||||
+
|
||||
/* The throughput metric is updated on each sent packet. This way, if a
|
||||
* node is dead and no longer sends packets, batman-adv is still able to
|
||||
* react timely to its death.
|
||||
@@ -340,16 +353,28 @@ static void batadv_v_elp_periodic_work(s
|
||||
|
||||
/* Reading the estimated throughput from cfg80211 is a task that
|
||||
* may sleep and that is not allowed in an rcu protected
|
||||
- * context. Therefore schedule a task for that.
|
||||
+ * context. Therefore add it to metric_queue and process it
|
||||
+ * outside rcu protected context.
|
||||
*/
|
||||
- ret = queue_work(batadv_event_workqueue,
|
||||
- &hardif_neigh->bat_v.metric_work);
|
||||
-
|
||||
- if (!ret)
|
||||
+ metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC);
|
||||
+ if (!metric_entry) {
|
||||
batadv_hardif_neigh_put(hardif_neigh);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ metric_entry->hardif_neigh = hardif_neigh;
|
||||
+ list_add(&metric_entry->list, &metric_queue);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
+ list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) {
|
||||
+ batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh);
|
||||
+
|
||||
+ batadv_hardif_neigh_put(metric_entry->hardif_neigh);
|
||||
+ list_del(&metric_entry->list);
|
||||
+ kfree(metric_entry);
|
||||
+ }
|
||||
+
|
||||
restart_timer:
|
||||
batadv_v_elp_start_timer(hard_iface);
|
||||
out:
|
||||
--- a/net/batman-adv/bat_v_elp.h
|
||||
+++ b/net/batman-adv/bat_v_elp.h
|
||||
@@ -10,7 +10,6 @@
|
||||
#include "main.h"
|
||||
|
||||
#include <linux/skbuff.h>
|
||||
-#include <linux/workqueue.h>
|
||||
|
||||
int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
|
||||
void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
|
||||
@@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct
|
||||
void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
|
||||
int batadv_v_elp_packet_recv(struct sk_buff *skb,
|
||||
struct batadv_hard_iface *if_incoming);
|
||||
-void batadv_v_elp_throughput_metric_update(struct work_struct *work);
|
||||
|
||||
#endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
|
||||
--- a/net/batman-adv/types.h
|
||||
+++ b/net/batman-adv/types.h
|
||||
@@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v {
|
||||
* neighbor
|
||||
*/
|
||||
unsigned long last_unicast_tx;
|
||||
-
|
||||
- /** @metric_work: work queue callback item for metric update */
|
||||
- struct work_struct metric_work;
|
||||
};
|
||||
|
||||
/**
|
||||
Reference in New Issue
Block a user