From 9a76db03653c9560736d332d31c557c1c03b3804 Mon Sep 17 00:00:00 2001 From: Nathan Taylor Date: Wed, 3 Dec 2014 17:16:55 -0800 Subject: [PATCH] fib_select_multipath() to choose nexthop via naive but lockless round robin --- include/net/ip_fib.h | 6 +-- include/uapi/linux/rtnetlink.h | 11 +++-- net/ipv4/fib_semantics.c | 76 ++++++++++------------------------ 3 files changed, 32 insertions(+), 61 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e49db91593a953..28897725fba901 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -70,11 +70,10 @@ struct fib_nh { struct net_device *nh_dev; struct hlist_node nh_hash; struct fib_info *nh_parent; - unsigned int nh_flags; + unsigned long nh_flags; unsigned char nh_scope; #ifdef CONFIG_IP_ROUTE_MULTIPATH int nh_weight; - int nh_power; #endif #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; @@ -111,9 +110,6 @@ struct fib_info { #define fib_rtt fib_metrics[RTAX_RTT-1] #define fib_advmss fib_metrics[RTAX_ADVMSS-1] int fib_nhs; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - int fib_power; -#endif struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 7a2144e1afae67..a10e7e44ee6885 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -323,9 +323,14 @@ struct rtnexthop { /* rtnh_flags */ -#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ -#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ -#define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_DEAD_OFFSET 0 +#define RTNH_F_DEAD (1 << RTNH_F_DEAD_OFFSET) /* Nexthop is dead (used by multipath) */ + +#define RTNH_F_PERVASIVE_OFFSET 1 +#define RTNH_F_PERVASIVE (1 << RTNH_F_PERVASIVE_OFFSET) /* Do recursive gateway lookup */ + +#define RTNH_F_ONLINK_OFFSET 2 +#define RTNH_F_ONLINK (1 << RTNH_F_ONLINK_OFFSET) /* Gateway is forced on link */ /* Macros to handle hexthops */ diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index bc773a10dca63b..1f5773c43a7fe5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -57,7 +58,7 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; #ifdef CONFIG_IP_ROUTE_MULTIPATH -static DEFINE_SPINLOCK(fib_multipath_lock); +DEFINE_PER_CPU(int, fib_multipath_counter) = -1; #define for_nexthops(fi) { \ int nhsel; const struct fib_nh *nh; \ @@ -258,9 +259,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) if (nh->nh_oif != onh->nh_oif || nh->nh_gw != onh->nh_gw || nh->nh_scope != onh->nh_scope || -#ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight != onh->nh_weight || -#endif #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid != onh->nh_tclassid || #endif @@ -1137,12 +1136,6 @@ int fib_sync_down_dev(struct net_device *dev, int force) else if (nexthop_nh->nh_dev == dev && nexthop_nh->nh_scope != scope) { nexthop_nh->nh_flags |= RTNH_F_DEAD; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - spin_lock_bh(&fib_multipath_lock); - fi->fib_power -= nexthop_nh->nh_power; - nexthop_nh->nh_power = 0; - spin_unlock_bh(&fib_multipath_lock); -#endif dead++; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -1261,10 +1254,7 @@ int fib_sync_up(struct net_device *dev) !__in_dev_get_rtnl(dev)) continue; alive++; - spin_lock_bh(&fib_multipath_lock); - nexthop_nh->nh_power = 0; - nexthop_nh->nh_flags &= ~RTNH_F_DEAD; - spin_unlock_bh(&fib_multipath_lock); + clear_bit(RTNH_F_DEAD_OFFSET, &nexthop_nh->nh_flags); } endfor_nexthops(fi) if (alive > 0) { @@ -1277,55 +1267,35 @@ int fib_sync_up(struct net_device *dev) } /* - * The algorithm is suboptimal, but it provides really - * fair weighted route distribution. + * The algorithm is suboptimal, and it doesn't provide really + * fair weighted route distribution, but it's lock-free! */ void fib_select_multipath(struct fib_result *res) { struct fib_info *fi = res->fi; - int w; + struct fib_nh *nexthop_nh; + int nhsel, attempts; - spin_lock_bh(&fib_multipath_lock); - if (fi->fib_power <= 0) { - int power = 0; - change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { - power += nexthop_nh->nh_weight; - nexthop_nh->nh_power = nexthop_nh->nh_weight; - } - } endfor_nexthops(fi); - fi->fib_power = power; - if (power <= 0) { - spin_unlock_bh(&fib_multipath_lock); - /* Race condition: route has just become dead. */ - res->nh_sel = 0; - return; - } + /* Initialise the counter with the CPU ID if not already set. */ + if (this_cpu_read(fib_multipath_counter) == -1) { + this_cpu_write(fib_multipath_counter, get_cpu()); } + /* Round-robin multipaths, choosing the next live one or + * an arbitrary dead one if none are live. */ + attempts = fi->fib_nhs; + nhsel = 0; + while (--attempts >= 0) { + this_cpu_inc(fib_multipath_counter); + nhsel = this_cpu_read(fib_multipath_counter) % fi->fib_nhs; - /* w should be random number [0..fi->fib_power-1], - * it is pretty bad approximation. - */ - - w = jiffies % fi->fib_power; - - change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) && - nexthop_nh->nh_power) { - w -= nexthop_nh->nh_power; - if (w <= 0) { - nexthop_nh->nh_power--; - fi->fib_power--; - res->nh_sel = nhsel; - spin_unlock_bh(&fib_multipath_lock); - return; - } + nexthop_nh = &fi->fib_nh[nhsel]; + if (!nexthop_nh->nh_flags & RTNH_F_DEAD) { + break; } - } endfor_nexthops(fi); + } - /* Race condition: route has just become dead. */ - res->nh_sel = 0; - spin_unlock_bh(&fib_multipath_lock); + /* Race condition: route may have just become dead. */ + res->nh_sel = nhsel; } #endif