ipv6: fix src addr routing with the exception table
When inserting route cache into the exception table, the key is
generated with both src_addr and dest_addr with src addr routing.
However, current logic always assumes the src_addr used to generate the
key is a /128 host address. This is not true in the following scenarios:
1. When the route is a gateway route or does not have next hop.
(rt6_is_gw_or_nonexthop() == false)
2. When calling ip6_rt_cache_alloc(), saddr is passed in as NULL.
This means, when looking for a route cache in the exception table, we
have to do the lookup twice: first time with the passed in /128 host
address, second time with the src_addr stored in fib6_info.
This solves the pmtu discovery issue reported by Mikael Magnusson where
a route cache with a lower mtu info is created for a gateway route with
src addr. However, the lookup code is not able to find this route cache.
Fixes: 2b760fcf5c ("ipv6: hook up exception table to store dst cache")
Reported-by: Mikael Magnusson <mikael.kernel@lists.m7n.se>
Bisected-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Wei Wang <weiwan@google.com>
Cc: Martin Lau <kafai@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
committed by
David S. Miller
parent
9a6c8bf91b
commit
510e2ceda0
+27
-24
@@ -111,8 +111,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
|
|||||||
int iif, int type, u32 portid, u32 seq,
|
int iif, int type, u32 portid, u32 seq,
|
||||||
unsigned int flags);
|
unsigned int flags);
|
||||||
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
|
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
|
||||||
struct in6_addr *daddr,
|
const struct in6_addr *daddr,
|
||||||
struct in6_addr *saddr);
|
const struct in6_addr *saddr);
|
||||||
|
|
||||||
#ifdef CONFIG_IPV6_ROUTE_INFO
|
#ifdef CONFIG_IPV6_ROUTE_INFO
|
||||||
static struct fib6_info *rt6_add_route_info(struct net *net,
|
static struct fib6_info *rt6_add_route_info(struct net *net,
|
||||||
@@ -1573,31 +1573,44 @@ out:
|
|||||||
* Caller has to hold rcu_read_lock()
|
* Caller has to hold rcu_read_lock()
|
||||||
*/
|
*/
|
||||||
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
|
static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
|
||||||
struct in6_addr *daddr,
|
const struct in6_addr *daddr,
|
||||||
struct in6_addr *saddr)
|
const struct in6_addr *saddr)
|
||||||
{
|
{
|
||||||
|
const struct in6_addr *src_key = NULL;
|
||||||
struct rt6_exception_bucket *bucket;
|
struct rt6_exception_bucket *bucket;
|
||||||
struct in6_addr *src_key = NULL;
|
|
||||||
struct rt6_exception *rt6_ex;
|
struct rt6_exception *rt6_ex;
|
||||||
struct rt6_info *ret = NULL;
|
struct rt6_info *ret = NULL;
|
||||||
|
|
||||||
bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
|
|
||||||
|
|
||||||
#ifdef CONFIG_IPV6_SUBTREES
|
#ifdef CONFIG_IPV6_SUBTREES
|
||||||
/* fib6i_src.plen != 0 indicates f6i is in subtree
|
/* fib6i_src.plen != 0 indicates f6i is in subtree
|
||||||
* and exception table is indexed by a hash of
|
* and exception table is indexed by a hash of
|
||||||
* both fib6_dst and fib6_src.
|
* both fib6_dst and fib6_src.
|
||||||
* Otherwise, the exception table is indexed by
|
* However, the src addr used to create the hash
|
||||||
* a hash of only fib6_dst.
|
* might not be exactly the passed in saddr which
|
||||||
|
* is a /128 addr from the flow.
|
||||||
|
* So we need to use f6i->fib6_src to redo lookup
|
||||||
|
* if the passed in saddr does not find anything.
|
||||||
|
* (See the logic in ip6_rt_cache_alloc() on how
|
||||||
|
* rt->rt6i_src is updated.)
|
||||||
*/
|
*/
|
||||||
if (res->f6i->fib6_src.plen)
|
if (res->f6i->fib6_src.plen)
|
||||||
src_key = saddr;
|
src_key = saddr;
|
||||||
|
find_ex:
|
||||||
#endif
|
#endif
|
||||||
|
bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
|
||||||
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
|
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
|
||||||
|
|
||||||
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
|
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
|
||||||
ret = rt6_ex->rt6i;
|
ret = rt6_ex->rt6i;
|
||||||
|
|
||||||
|
#ifdef CONFIG_IPV6_SUBTREES
|
||||||
|
/* Use fib6_src as src_key and redo lookup */
|
||||||
|
if (!ret && src_key && src_key != &res->f6i->fib6_src.addr) {
|
||||||
|
src_key = &res->f6i->fib6_src.addr;
|
||||||
|
goto find_ex;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2672,12 +2685,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
|
|||||||
const struct in6_addr *daddr,
|
const struct in6_addr *daddr,
|
||||||
const struct in6_addr *saddr)
|
const struct in6_addr *saddr)
|
||||||
{
|
{
|
||||||
struct rt6_exception_bucket *bucket;
|
|
||||||
const struct fib6_nh *nh = res->nh;
|
const struct fib6_nh *nh = res->nh;
|
||||||
struct fib6_info *f6i = res->f6i;
|
struct fib6_info *f6i = res->f6i;
|
||||||
const struct in6_addr *src_key;
|
|
||||||
struct rt6_exception *rt6_ex;
|
|
||||||
struct inet6_dev *idev;
|
struct inet6_dev *idev;
|
||||||
|
struct rt6_info *rt;
|
||||||
u32 mtu = 0;
|
u32 mtu = 0;
|
||||||
|
|
||||||
if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
|
if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
|
||||||
@@ -2686,18 +2697,10 @@ u32 ip6_mtu_from_fib6(const struct fib6_result *res,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
src_key = NULL;
|
rt = rt6_find_cached_rt(res, daddr, saddr);
|
||||||
#ifdef CONFIG_IPV6_SUBTREES
|
if (unlikely(rt)) {
|
||||||
if (f6i->fib6_src.plen)
|
mtu = dst_metric_raw(&rt->dst, RTAX_MTU);
|
||||||
src_key = saddr;
|
} else {
|
||||||
#endif
|
|
||||||
|
|
||||||
bucket = rcu_dereference(f6i->rt6i_exception_bucket);
|
|
||||||
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
|
|
||||||
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
|
|
||||||
mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
|
|
||||||
|
|
||||||
if (likely(!mtu)) {
|
|
||||||
struct net_device *dev = nh->fib_nh_dev;
|
struct net_device *dev = nh->fib_nh_dev;
|
||||||
|
|
||||||
mtu = IPV6_MIN_MTU;
|
mtu = IPV6_MIN_MTU;
|
||||||
|
|||||||
Reference in New Issue
Block a user