Newer
Older
/*
* Linux INET6 implementation
* FIB front-end.
*
* Authors:
*
* $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* Changes:
*
* YOSHIFUJI Hideaki @USAGI
* reworked default router selection.
* - respect outgoing interface
* - select from (probably) reachable routers (i.e.
* routers in REACHABLE, STALE, DELAY or PROBE states).
* - always select the same router if it is (probably)
* reachable. otherwise, round-robin the list.
* Ville Nuorvala
* Fixed routing subtrees.
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/times.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/route.h>
#include <linux/netdevice.h>
#include <linux/in6.h>
#include <linux/init.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/nsproxy.h>
#include <net/net_namespace.h>
#include <net/snmp.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
#include <net/tcp.h>
#include <linux/rtnetlink.h>
#include <net/dst.h>
#include <net/xfrm.h>
#include <net/netlink.h>
#include <asm/uaccess.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
/* Set to 3 to get tracing. */
#define RT6_DEBUG 2
#if RT6_DEBUG >= 3
#define RDBG(x) printk x
#define RT6_TRACE(x...) printk(KERN_DEBUG x)
#else
#define RDBG(x)
#define RT6_TRACE(x...) do { ; } while (0)
#endif
#define CLONE_OFFLINK_ROUTE 0
static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
struct net_device *dev, int how);
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);

YOSHIFUJI Hideaki
committed
#ifdef CONFIG_IPV6_ROUTE_INFO

Daniel Lezcano
committed
static struct rt6_info *rt6_add_route_info(struct net *net,
struct in6_addr *prefix, int prefixlen,

YOSHIFUJI Hideaki
committed
struct in6_addr *gwaddr, int ifindex,
unsigned pref);

Daniel Lezcano
committed
static struct rt6_info *rt6_get_route_info(struct net *net,
struct in6_addr *prefix, int prefixlen,

YOSHIFUJI Hideaki
committed
struct in6_addr *gwaddr, int ifindex);
#endif
static struct dst_ops ip6_dst_ops_template = {
.family = AF_INET6,
.protocol = __constant_htons(ETH_P_IPV6),
.gc = ip6_dst_gc,
.gc_thresh = 1024,
.check = ip6_dst_check,
.destroy = ip6_dst_destroy,
.ifdown = ip6_dst_ifdown,
.negative_advice = ip6_negative_advice,
.link_failure = ip6_link_failure,
.update_pmtu = ip6_rt_update_pmtu,
.entries = ATOMIC_INIT(0),
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
{
}
static struct dst_ops ip6_dst_blackhole_ops = {
.family = AF_INET6,
.protocol = __constant_htons(ETH_P_IPV6),
.destroy = ip6_dst_destroy,
.check = ip6_dst_check,
.update_pmtu = ip6_rt_blackhole_update_pmtu,
.entry_size = sizeof(struct rt6_info),
.entries = ATOMIC_INIT(0),
static struct rt6_info ip6_null_entry_template = {
.u = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
.obsolete = -1,
.error = -ENETUNREACH,
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_discard,
.output = ip6_pkt_discard_out,
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_metric = ~(u32) 0,
.rt6i_ref = ATOMIC_INIT(1),
};
static int ip6_pkt_prohibit(struct sk_buff *skb);
static int ip6_pkt_prohibit_out(struct sk_buff *skb);
struct rt6_info ip6_prohibit_entry_template = {
.u = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
.obsolete = -1,
.error = -EACCES,
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = ip6_pkt_prohibit,
.output = ip6_pkt_prohibit_out,
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_metric = ~(u32) 0,
.rt6i_ref = ATOMIC_INIT(1),
};
static struct rt6_info ip6_blk_hole_entry_template = {
.u = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
.__use = 1,
.obsolete = -1,
.error = -EINVAL,
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
.input = dst_discard,
.output = dst_discard,
}
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_metric = ~(u32) 0,
.rt6i_ref = ATOMIC_INIT(1),
};
#endif
static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
return (struct rt6_info *)dst_alloc(ops);
}
static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
if (idev != NULL) {
rt->rt6i_idev = NULL;
in6_dev_put(idev);
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int how)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
struct net_device *loopback_dev =
dev->nd_net->loopback_dev;
if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
struct inet6_dev *loopback_idev =
in6_dev_get(loopback_dev);
if (loopback_idev != NULL) {
rt->rt6i_idev = loopback_idev;
in6_dev_put(idev);
}
}
}
static __inline__ int rt6_check_expired(const struct rt6_info *rt)
{
return (rt->rt6i_flags & RTF_EXPIRES &&
time_after(jiffies, rt->rt6i_expires));
}
static inline int rt6_need_strict(struct in6_addr *daddr)
{
return (ipv6_addr_type(daddr) &
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
}
* Route lookup. Any table->tb6_lock is implied.
static inline struct rt6_info *rt6_device_match(struct net *net,
struct rt6_info *rt,
int oif,
int strict)
{
struct rt6_info *local = NULL;
struct rt6_info *sprt;
if (oif) {
for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
struct net_device *dev = sprt->rt6i_dev;
if (dev->ifindex == oif)
return sprt;
if (dev->flags & IFF_LOOPBACK) {
if (sprt->rt6i_idev == NULL ||
sprt->rt6i_idev->dev->ifindex != oif) {
if (strict && oif)
continue;
local->rt6i_idev->dev->ifindex == oif))
continue;
}
local = sprt;
}
}
if (local)
return local;
if (strict)
return net->ipv6.ip6_null_entry;
#ifdef CONFIG_IPV6_ROUTER_PREF
static void rt6_probe(struct rt6_info *rt)
{
struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
/*
* Okay, this does not seem to be appropriate
* for now, however, we need to check if it
* is really so; aka Router Reachability Probing.
*
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
if (!neigh || (neigh->nud_state & NUD_VALID))
return;
read_lock_bh(&neigh->lock);
if (!(neigh->nud_state & NUD_VALID) &&
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
struct in6_addr mcaddr;
struct in6_addr *target;
neigh->updated = jiffies;
read_unlock_bh(&neigh->lock);
target = (struct in6_addr *)&neigh->primary_key;
addrconf_addr_solict_mult(target, &mcaddr);
ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
} else
read_unlock_bh(&neigh->lock);
}
#else
static inline void rt6_probe(struct rt6_info *rt)
{
return;
}
#endif
* Default Router Selection (RFC 2461 6.3.6)
static inline int rt6_check_dev(struct rt6_info *rt, int oif)
{
struct net_device *dev = rt->rt6i_dev;
if (!oif || dev->ifindex == oif)
if ((dev->flags & IFF_LOOPBACK) &&
rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
return 1;
return 0;
static inline int rt6_check_neigh(struct rt6_info *rt)
struct neighbour *neigh = rt->rt6i_nexthop;
int m;
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
m = 1;
else if (neigh) {
read_lock_bh(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
m = 2;
#ifdef CONFIG_IPV6_ROUTER_PREF
else if (neigh->nud_state & NUD_FAILED)
m = 0;
#endif
else
m = 1;
read_unlock_bh(&neigh->lock);
} else
m = 0;
static int rt6_score_route(struct rt6_info *rt, int oif,
int strict)
int m, n;
m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE))
#ifdef CONFIG_IPV6_ROUTER_PREF
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
#endif
n = rt6_check_neigh(rt);
if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
return -1;
return m;
}
static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
int *mpri, struct rt6_info *match)
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
int m;
if (rt6_check_expired(rt))
goto out;
m = rt6_score_route(rt, oif, strict);
if (m < 0)
goto out;
if (m > *mpri) {
if (strict & RT6_LOOKUP_F_REACHABLE)
rt6_probe(match);
*mpri = m;
match = rt;
} else if (strict & RT6_LOOKUP_F_REACHABLE) {
rt6_probe(rt);
}
out:
return match;
}
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
struct rt6_info *rr_head,
u32 metric, int oif, int strict)
{
struct rt6_info *rt, *match;
int mpri = -1;
match = NULL;
for (rt = rr_head; rt && rt->rt6i_metric == metric;
rt = rt->u.dst.rt6_next)
match = find_match(rt, oif, strict, &mpri, match);
for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
rt = rt->u.dst.rt6_next)
match = find_match(rt, oif, strict, &mpri, match);
static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
{
struct rt6_info *match, *rt0;
struct net *net;
RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
__FUNCTION__, fn->leaf, oif);
rt0 = fn->rr_ptr;
if (!rt0)
fn->rr_ptr = rt0 = fn->leaf;
match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
if (!match &&
(strict & RT6_LOOKUP_F_REACHABLE)) {
struct rt6_info *next = rt0->u.dst.rt6_next;
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
next = fn->leaf;
if (next != rt0)
fn->rr_ptr = next;
RT6_TRACE("%s() => %p\n",
__FUNCTION__, match);
net = rt0->rt6i_dev->nd_net;
return (match ? match : net->ipv6.ip6_null_entry);

YOSHIFUJI Hideaki
committed
#ifdef CONFIG_IPV6_ROUTE_INFO
int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
struct in6_addr *gwaddr)
{

Daniel Lezcano
committed
struct net *net = dev->nd_net;

YOSHIFUJI Hideaki
committed
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
struct route_info *rinfo = (struct route_info *) opt;
struct in6_addr prefix_buf, *prefix;
unsigned int pref;
u32 lifetime;
struct rt6_info *rt;
if (len < sizeof(struct route_info)) {
return -EINVAL;
}
/* Sanity check for prefix_len and length */
if (rinfo->length > 3) {
return -EINVAL;
} else if (rinfo->prefix_len > 128) {
return -EINVAL;
} else if (rinfo->prefix_len > 64) {
if (rinfo->length < 2) {
return -EINVAL;
}
} else if (rinfo->prefix_len > 0) {
if (rinfo->length < 1) {
return -EINVAL;
}
}
pref = rinfo->route_pref;
if (pref == ICMPV6_ROUTER_PREF_INVALID)
pref = ICMPV6_ROUTER_PREF_MEDIUM;

YOSHIFUJI Hideaki
committed
if (lifetime == 0xffffffff) {
/* infinity */
} else if (lifetime > 0x7fffffff/HZ) {
/* Avoid arithmetic overflow */
lifetime = 0x7fffffff/HZ - 1;
}
if (rinfo->length == 3)
prefix = (struct in6_addr *)rinfo->prefix;
else {
/* this function is safe */
ipv6_addr_prefix(&prefix_buf,
(struct in6_addr *)rinfo->prefix,
rinfo->prefix_len);
prefix = &prefix_buf;
}

Daniel Lezcano
committed
rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
dev->ifindex);

YOSHIFUJI Hideaki
committed
if (rt && !lifetime) {

YOSHIFUJI Hideaki
committed
rt = NULL;
}
if (!rt && lifetime)

Daniel Lezcano
committed
rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,

YOSHIFUJI Hideaki
committed
pref);
else if (rt)
rt->rt6i_flags = RTF_ROUTEINFO |
(rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
if (rt) {
if (lifetime == 0xffffffff) {
rt->rt6i_flags &= ~RTF_EXPIRES;
} else {
rt->rt6i_expires = jiffies + HZ * lifetime;
rt->rt6i_flags |= RTF_EXPIRES;
}
dst_release(&rt->u.dst);
}
return 0;
}
#endif
#define BACKTRACK(__net, saddr) \
if (rt == __net->ipv6.ip6_null_entry) { \
struct fib6_node *pn; \
if (fn->fn_flags & RTN_TL_ROOT) \
goto out; \
pn = fn->parent; \
if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
else \
fn = pn; \
if (fn->fn_flags & RTN_RTINFO) \
goto restart; \
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
{
struct fib6_node *fn;
struct rt6_info *rt;
read_lock_bh(&table->tb6_lock);
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = fn->leaf;
rt = rt6_device_match(net, rt, fl->oif, flags);
BACKTRACK(net, &fl->fl6_src);
dst_use(&rt->u.dst, jiffies);
read_unlock_bh(&table->tb6_lock);
return rt;
}
struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
struct in6_addr *saddr, int oif, int strict)
{
struct flowi fl = {
.oif = oif,
.nl_u = {
.ip6_u = {
.daddr = *daddr,
},
},
};
struct dst_entry *dst;
int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
if (saddr) {
memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
flags |= RT6_LOOKUP_F_HAS_SADDR;
}
dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
dst_release(dst);
/* ip6_ins_rt is called with FREE table->tb6_lock.
It takes new route entry, the addition fails by any reason the
route is freed. In any case, if caller does not hold it, it may
be destroyed.
*/
static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
err = fib6_add(&table->tb6_root, rt, info);
int ip6_ins_rt(struct rt6_info *rt)
{
.nl_net = rt->rt6i_dev->nd_net,
return __ip6_ins_rt(rt, &info);
static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
struct in6_addr *saddr)
{
struct rt6_info *rt;
/*
* Clone the route.
*/
rt = ip6_rt_copy(ort);
if (rt) {
if (!(rt->rt6i_flags&RTF_GATEWAY)) {
if (rt->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
rt->rt6i_dst.plen = 128;
rt->rt6i_flags |= RTF_CACHE;
rt->u.dst.flags |= DST_HOST;
#ifdef CONFIG_IPV6_SUBTREES
if (rt->rt6i_src.plen && saddr) {
ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
rt->rt6i_src.plen = 128;
}
#endif
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
return rt;
}
static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
{
struct rt6_info *rt = ip6_rt_copy(ort);
if (rt) {
ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
rt->rt6i_dst.plen = 128;
rt->rt6i_flags |= RTF_CACHE;
rt->u.dst.flags |= DST_HOST;
rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
}
return rt;
}
static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
struct flowi *fl, int flags)
struct rt6_info *rt, *nrt;
int err;
int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
strict |= flags & RT6_LOOKUP_F_IFACE;

YOSHIFUJI Hideaki
committed
restart_2:
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
rt = rt6_select(fn, oif, strict | reachable);
BACKTRACK(net, &fl->fl6_src);
if (rt == net->ipv6.ip6_null_entry ||

YOSHIFUJI Hideaki
committed
rt->rt6i_flags & RTF_CACHE)

YOSHIFUJI Hideaki
committed
goto out;

YOSHIFUJI Hideaki
committed
dst_hold(&rt->u.dst);

YOSHIFUJI Hideaki
committed
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
else {
#if CLONE_OFFLINK_ROUTE
#else
goto out2;
#endif
}
dst_release(&rt->u.dst);
rt = nrt ? : net->ipv6.ip6_null_entry;
dst_hold(&rt->u.dst);
if (nrt) {
if (!err)
if (--attempts <= 0)
goto out2;
/*
* Race condition! In the gap, when table->tb6_lock was
* released someone could insert this route. Relookup.
*/
dst_release(&rt->u.dst);
goto relookup;
out:

YOSHIFUJI Hideaki
committed
if (reachable) {
reachable = 0;
goto restart_2;
}
dst_hold(&rt->u.dst);
out2:
rt->u.dst.lastuse = jiffies;
rt->u.dst.__use++;
static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
struct flowi *fl, int flags)
{
return ip6_pol_route(net, table, fl->iif, fl, flags);
void ip6_route_input(struct sk_buff *skb)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = skb->dev->nd_net;
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct flowi fl = {
.iif = skb->dev->ifindex,
.nl_u = {
.ip6_u = {
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
if (rt6_need_strict(&iph->daddr))
flags |= RT6_LOOKUP_F_IFACE;
skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
return ip6_pol_route(net, table, fl->oif, fl, flags);
struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
struct flowi *fl)
{
int flags = 0;
if (rt6_need_strict(&fl->fl6_dst))
flags |= RT6_LOOKUP_F_IFACE;
if (!ipv6_addr_any(&fl->fl6_src))
flags |= RT6_LOOKUP_F_HAS_SADDR;
return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
EXPORT_SYMBOL(ip6_route_output);
int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
{
struct rt6_info *ort = (struct rt6_info *) *dstp;
struct rt6_info *rt = (struct rt6_info *)
dst_alloc(&ip6_dst_blackhole_ops);
struct dst_entry *new = NULL;
if (rt) {
new = &rt->u.dst;
atomic_set(&new->__refcnt, 1);
new->__use = 1;
new->input = dst_discard;
new->output = dst_discard;
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
new->dev = ort->u.dst.dev;
if (new->dev)
dev_hold(new->dev);
rt->rt6i_idev = ort->rt6i_idev;
if (rt->rt6i_idev)
in6_dev_hold(rt->rt6i_idev);
rt->rt6i_expires = 0;
ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
rt->rt6i_metric = 0;
memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
#ifdef CONFIG_IPV6_SUBTREES
memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
#endif
dst_free(new);
}
dst_release(*dstp);
*dstp = new;
return (new ? 0 : -ENOMEM);
}
EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
/*
* Destination cache support functions
*/
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
{
struct rt6_info *rt;
rt = (struct rt6_info *) dst;
if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
return dst;
return NULL;
}
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *) dst;
if (rt) {
if (rt->rt6i_flags & RTF_CACHE)
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
else
dst_release(dst);
}
return NULL;
}
static void ip6_link_failure(struct sk_buff *skb)
{
struct rt6_info *rt;
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
rt = (struct rt6_info *) skb->dst;
if (rt) {
if (rt->rt6i_flags&RTF_CACHE) {
dst_set_expires(&rt->u.dst, 0);
rt->rt6i_flags |= RTF_EXPIRES;
} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
rt->rt6i_node->fn_sernum = -1;
}
}
static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
{
struct rt6_info *rt6 = (struct rt6_info*)dst;
if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
if (mtu < IPV6_MIN_MTU) {
mtu = IPV6_MIN_MTU;
dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
}
dst->metrics[RTAX_MTU-1] = mtu;
call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
}
}
static int ipv6_get_mtu(struct net_device *dev);
static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
{
mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
* Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
* corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
* IPV6_MAXPLEN is also valid and means: "any MSS,
* rely only on pmtu discovery"
*/
if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
mtu = IPV6_MAXPLEN;
return mtu;
}
static struct dst_entry *icmp6_dst_gc_list;
static DEFINE_SPINLOCK(icmp6_dst_lock);
struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
struct in6_addr *addr)
{
struct rt6_info *rt;
struct inet6_dev *idev = in6_dev_get(dev);
struct net *net = dev->nd_net;
rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
if (unlikely(rt == NULL)) {
in6_dev_put(idev);
goto out;
}
dev_hold(dev);
if (neigh)
neigh_hold(neigh);
else
neigh = ndisc_get_neigh(dev, addr);
rt->rt6i_dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_nexthop = neigh;
atomic_set(&rt->u.dst.__refcnt, 1);
rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
rt->u.dst.output = ip6_output;
#if 0 /* there's no chance to use these for ndisc */
rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
? DST_HOST
: 0;
ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
rt->rt6i_dst.plen = 128;
#endif
spin_lock_bh(&icmp6_dst_lock);
rt->u.dst.next = icmp6_dst_gc_list;
icmp6_dst_gc_list = &rt->u.dst;
spin_unlock_bh(&icmp6_dst_lock);
fib6_force_start_gc(net);
int icmp6_dst_gc(int *more)
{
struct dst_entry *dst, *next, **pprev;
int freed;
next = NULL;
spin_lock_bh(&icmp6_dst_lock);
pprev = &icmp6_dst_gc_list;
while ((dst = *pprev) != NULL) {
if (!atomic_read(&dst->__refcnt)) {
*pprev = dst->next;
dst_free(dst);
freed++;
} else {
pprev = &dst->next;
(*more)++;
}
}
spin_unlock_bh(&icmp6_dst_lock);
static int ip6_dst_gc(struct dst_ops *ops)
struct net *net = ops->dst_net;
int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;