Skip to content
Snippets Groups Projects
route.c 65.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	Linux INET6 implementation
     *	FIB front-end.
     *
     *	Authors:
    
     *	Pedro Roque		<roque@di.fc.ul.pt>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
     *	$Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
     *
     *	This program is free software; you can redistribute it and/or
     *      modify it under the terms of the GNU General Public License
     *      as published by the Free Software Foundation; either version
     *      2 of the License, or (at your option) any later version.
     */
    
    /*	Changes:
     *
     *	YOSHIFUJI Hideaki @USAGI
     *		reworked default router selection.
     *		- respect outgoing interface
     *		- select from (probably) reachable routers (i.e.
     *		routers in REACHABLE, STALE, DELAY or PROBE states).
     *		- always select the same router if it is (probably)
     *		reachable.  otherwise, round-robin the list.
    
     *	Ville Nuorvala
     *		Fixed routing subtrees.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    
    #include <linux/capability.h>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include <linux/errno.h>
    #include <linux/types.h>
    #include <linux/times.h>
    #include <linux/socket.h>
    #include <linux/sockios.h>
    #include <linux/net.h>
    #include <linux/route.h>
    #include <linux/netdevice.h>
    #include <linux/in6.h>
    #include <linux/init.h>
    #include <linux/if_arp.h>
    #include <linux/proc_fs.h>
    #include <linux/seq_file.h>
    
    #include <linux/nsproxy.h>
    
    #include <net/net_namespace.h>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include <net/snmp.h>
    #include <net/ipv6.h>
    #include <net/ip6_fib.h>
    #include <net/ip6_route.h>
    #include <net/ndisc.h>
    #include <net/addrconf.h>
    #include <net/tcp.h>
    #include <linux/rtnetlink.h>
    #include <net/dst.h>
    #include <net/xfrm.h>
    
    #include <net/netevent.h>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    #include <asm/uaccess.h>
    
    #ifdef CONFIG_SYSCTL
    #include <linux/sysctl.h>
    #endif
    
    /* Set to 3 to get tracing. */
    #define RT6_DEBUG 2
    
    #if RT6_DEBUG >= 3
    #define RDBG(x) printk x
    #define RT6_TRACE(x...) printk(KERN_DEBUG x)
    #else
    #define RDBG(x)
    #define RT6_TRACE(x...) do { ; } while (0)
    #endif
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
    static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
    static struct dst_entry *ip6_negative_advice(struct dst_entry *);
    static void		ip6_dst_destroy(struct dst_entry *);
    static void		ip6_dst_ifdown(struct dst_entry *,
    				       struct net_device *dev, int how);
    
    static int		 ip6_dst_gc(struct dst_ops *ops);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    static int		ip6_pkt_discard(struct sk_buff *skb);
    static int		ip6_pkt_discard_out(struct sk_buff *skb);
    static void		ip6_link_failure(struct sk_buff *skb);
    static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
    
    
    static struct rt6_info *rt6_add_route_info(struct net *net,
    					   struct in6_addr *prefix, int prefixlen,
    
    					   struct in6_addr *gwaddr, int ifindex,
    					   unsigned pref);
    
    static struct rt6_info *rt6_get_route_info(struct net *net,
    					   struct in6_addr *prefix, int prefixlen,
    
    static struct dst_ops ip6_dst_ops_template = {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	.family			=	AF_INET6,
    	.protocol		=	__constant_htons(ETH_P_IPV6),
    	.gc			=	ip6_dst_gc,
    	.gc_thresh		=	1024,
    	.check			=	ip6_dst_check,
    	.destroy		=	ip6_dst_destroy,
    	.ifdown			=	ip6_dst_ifdown,
    	.negative_advice	=	ip6_negative_advice,
    	.link_failure		=	ip6_link_failure,
    	.update_pmtu		=	ip6_rt_update_pmtu,
    
    	.local_out		=	ip6_local_out,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	.entry_size		=	sizeof(struct rt6_info),
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    };
    
    
    static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
    {
    }
    
    static struct dst_ops ip6_dst_blackhole_ops = {
    	.family			=	AF_INET6,
    	.protocol		=	__constant_htons(ETH_P_IPV6),
    	.destroy		=	ip6_dst_destroy,
    	.check			=	ip6_dst_check,
    	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
    	.entry_size		=	sizeof(struct rt6_info),
    
    static struct rt6_info ip6_null_entry_template = {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	.u = {
    		.dst = {
    			.__refcnt	= ATOMIC_INIT(1),
    			.__use		= 1,
    			.obsolete	= -1,
    			.error		= -ENETUNREACH,
    			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
    			.input		= ip6_pkt_discard,
    			.output		= ip6_pkt_discard_out,
    		}
    	},
    	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
    	.rt6i_metric	= ~(u32) 0,
    	.rt6i_ref	= ATOMIC_INIT(1),
    };
    
    
    #ifdef CONFIG_IPV6_MULTIPLE_TABLES
    
    
    static int ip6_pkt_prohibit(struct sk_buff *skb);
    static int ip6_pkt_prohibit_out(struct sk_buff *skb);
    
    
    struct rt6_info ip6_prohibit_entry_template = {
    
    	.u = {
    		.dst = {
    			.__refcnt	= ATOMIC_INIT(1),
    			.__use		= 1,
    			.obsolete	= -1,
    			.error		= -EACCES,
    			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
    
    			.input		= ip6_pkt_prohibit,
    			.output		= ip6_pkt_prohibit_out,
    
    		}
    	},
    	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
    	.rt6i_metric	= ~(u32) 0,
    	.rt6i_ref	= ATOMIC_INIT(1),
    };
    
    
    static struct rt6_info ip6_blk_hole_entry_template = {
    
    	.u = {
    		.dst = {
    			.__refcnt	= ATOMIC_INIT(1),
    			.__use		= 1,
    			.obsolete	= -1,
    			.error		= -EINVAL,
    			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
    
    			.input		= dst_discard,
    			.output		= dst_discard,
    
    		}
    	},
    	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
    	.rt6i_metric	= ~(u32) 0,
    	.rt6i_ref	= ATOMIC_INIT(1),
    };
    
    #endif
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* allocate dst with ip6_dst_ops */
    
    static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	return (struct rt6_info *)dst_alloc(ops);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    static void ip6_dst_destroy(struct dst_entry *dst)
    {
    	struct rt6_info *rt = (struct rt6_info *)dst;
    	struct inet6_dev *idev = rt->rt6i_idev;
    
    	if (idev != NULL) {
    		rt->rt6i_idev = NULL;
    		in6_dev_put(idev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
    			   int how)
    {
    	struct rt6_info *rt = (struct rt6_info *)dst;
    	struct inet6_dev *idev = rt->rt6i_idev;
    
    	struct net_device *loopback_dev =
    		dev->nd_net->loopback_dev;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
    		struct inet6_dev *loopback_idev =
    			in6_dev_get(loopback_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (loopback_idev != NULL) {
    			rt->rt6i_idev = loopback_idev;
    			in6_dev_put(idev);
    		}
    	}
    }
    
    static __inline__ int rt6_check_expired(const struct rt6_info *rt)
    {
    	return (rt->rt6i_flags & RTF_EXPIRES &&
    		time_after(jiffies, rt->rt6i_expires));
    }
    
    
    static inline int rt6_need_strict(struct in6_addr *daddr)
    {
    	return (ipv6_addr_type(daddr) &
    		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
    
     *	Route lookup. Any table->tb6_lock is implied.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    
    static inline struct rt6_info *rt6_device_match(struct net *net,
    						    struct rt6_info *rt,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    						    int oif,
    						    int strict)
    {
    	struct rt6_info *local = NULL;
    	struct rt6_info *sprt;
    
    	if (oif) {
    
    		for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			struct net_device *dev = sprt->rt6i_dev;
    			if (dev->ifindex == oif)
    				return sprt;
    			if (dev->flags & IFF_LOOPBACK) {
    				if (sprt->rt6i_idev == NULL ||
    				    sprt->rt6i_idev->dev->ifindex != oif) {
    					if (strict && oif)
    						continue;
    
    					if (local && (!oif ||
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    						      local->rt6i_idev->dev->ifindex == oif))
    						continue;
    				}
    				local = sprt;
    			}
    		}
    
    		if (local)
    			return local;
    
    		if (strict)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return rt;
    }
    
    
    #ifdef CONFIG_IPV6_ROUTER_PREF
    static void rt6_probe(struct rt6_info *rt)
    {
    	struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
    	/*
    	 * Okay, this does not seem to be appropriate
    	 * for now, however, we need to check if it
    	 * is really so; aka Router Reachability Probing.
    	 *
    	 * Router Reachability Probe MUST be rate-limited
    	 * to no more than one per minute.
    	 */
    	if (!neigh || (neigh->nud_state & NUD_VALID))
    		return;
    	read_lock_bh(&neigh->lock);
    	if (!(neigh->nud_state & NUD_VALID) &&
    
    	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
    
    		struct in6_addr mcaddr;
    		struct in6_addr *target;
    
    		neigh->updated = jiffies;
    		read_unlock_bh(&neigh->lock);
    
    		target = (struct in6_addr *)&neigh->primary_key;
    		addrconf_addr_solict_mult(target, &mcaddr);
    		ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
    	} else
    		read_unlock_bh(&neigh->lock);
    }
    #else
    static inline void rt6_probe(struct rt6_info *rt)
    {
    	return;
    }
    #endif
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
    
     * Default Router Selection (RFC 2461 6.3.6)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    static inline int rt6_check_dev(struct rt6_info *rt, int oif)
    
    {
    	struct net_device *dev = rt->rt6i_dev;
    
    	if (!oif || dev->ifindex == oif)
    
    	if ((dev->flags & IFF_LOOPBACK) &&
    	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
    		return 1;
    	return 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static inline int rt6_check_neigh(struct rt6_info *rt)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct neighbour *neigh = rt->rt6i_nexthop;
    
    	if (rt->rt6i_flags & RTF_NONEXTHOP ||
    	    !(rt->rt6i_flags & RTF_GATEWAY))
    		m = 1;
    	else if (neigh) {
    
    		read_lock_bh(&neigh->lock);
    		if (neigh->nud_state & NUD_VALID)
    
    #ifdef CONFIG_IPV6_ROUTER_PREF
    		else if (neigh->nud_state & NUD_FAILED)
    			m = 0;
    #endif
    		else
    
    		read_unlock_bh(&neigh->lock);
    
    static int rt6_score_route(struct rt6_info *rt, int oif,
    			   int strict)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	if (!m && (strict & RT6_LOOKUP_F_IFACE))
    
    #ifdef CONFIG_IPV6_ROUTER_PREF
    	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
    #endif
    
    	if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
    
    static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
    				   int *mpri, struct rt6_info *match)
    
    	int m;
    
    	if (rt6_check_expired(rt))
    		goto out;
    
    	m = rt6_score_route(rt, oif, strict);
    	if (m < 0)
    		goto out;
    
    	if (m > *mpri) {
    		if (strict & RT6_LOOKUP_F_REACHABLE)
    			rt6_probe(match);
    		*mpri = m;
    		match = rt;
    	} else if (strict & RT6_LOOKUP_F_REACHABLE) {
    		rt6_probe(rt);
    	}
    
    out:
    	return match;
    }
    
    static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
    				     struct rt6_info *rr_head,
    				     u32 metric, int oif, int strict)
    {
    	struct rt6_info *rt, *match;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	match = NULL;
    	for (rt = rr_head; rt && rt->rt6i_metric == metric;
    	     rt = rt->u.dst.rt6_next)
    		match = find_match(rt, oif, strict, &mpri, match);
    	for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
    	     rt = rt->u.dst.rt6_next)
    		match = find_match(rt, oif, strict, &mpri, match);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
    {
    	struct rt6_info *match, *rt0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
    		  __FUNCTION__, fn->leaf, oif);
    
    	rt0 = fn->rr_ptr;
    	if (!rt0)
    		fn->rr_ptr = rt0 = fn->leaf;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	    (strict & RT6_LOOKUP_F_REACHABLE)) {
    		struct rt6_info *next = rt0->u.dst.rt6_next;
    
    
    		/* no entries matched; do round-robin */
    
    		if (!next || next->rt6i_metric != rt0->rt6i_metric)
    			next = fn->leaf;
    
    		if (next != rt0)
    			fn->rr_ptr = next;
    
    	RT6_TRACE("%s() => %p\n",
    		  __FUNCTION__, match);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	net = rt0->rt6i_dev->nd_net;
    	return (match ? match : net->ipv6.ip6_null_entry);
    
    #ifdef CONFIG_IPV6_ROUTE_INFO
    int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
    		  struct in6_addr *gwaddr)
    {
    
    	struct route_info *rinfo = (struct route_info *) opt;
    	struct in6_addr prefix_buf, *prefix;
    	unsigned int pref;
    	u32 lifetime;
    	struct rt6_info *rt;
    
    	if (len < sizeof(struct route_info)) {
    		return -EINVAL;
    	}
    
    	/* Sanity check for prefix_len and length */
    	if (rinfo->length > 3) {
    		return -EINVAL;
    	} else if (rinfo->prefix_len > 128) {
    		return -EINVAL;
    	} else if (rinfo->prefix_len > 64) {
    		if (rinfo->length < 2) {
    			return -EINVAL;
    		}
    	} else if (rinfo->prefix_len > 0) {
    		if (rinfo->length < 1) {
    			return -EINVAL;
    		}
    	}
    
    	pref = rinfo->route_pref;
    	if (pref == ICMPV6_ROUTER_PREF_INVALID)
    		pref = ICMPV6_ROUTER_PREF_MEDIUM;
    
    
    	lifetime = ntohl(rinfo->lifetime);
    
    	if (lifetime == 0xffffffff) {
    		/* infinity */
    	} else if (lifetime > 0x7fffffff/HZ) {
    		/* Avoid arithmetic overflow */
    		lifetime = 0x7fffffff/HZ - 1;
    	}
    
    	if (rinfo->length == 3)
    		prefix = (struct in6_addr *)rinfo->prefix;
    	else {
    		/* this function is safe */
    		ipv6_addr_prefix(&prefix_buf,
    				 (struct in6_addr *)rinfo->prefix,
    				 rinfo->prefix_len);
    		prefix = &prefix_buf;
    	}
    
    
    	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
    				dev->ifindex);
    
    		ip6_del_rt(rt);
    
    		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
    
    					pref);
    	else if (rt)
    		rt->rt6i_flags = RTF_ROUTEINFO |
    				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
    
    	if (rt) {
    		if (lifetime == 0xffffffff) {
    			rt->rt6i_flags &= ~RTF_EXPIRES;
    		} else {
    			rt->rt6i_expires = jiffies + HZ * lifetime;
    			rt->rt6i_flags |= RTF_EXPIRES;
    		}
    		dst_release(&rt->u.dst);
    	}
    	return 0;
    }
    #endif
    
    
    #define BACKTRACK(__net, saddr)			\
    
    	if (rt == __net->ipv6.ip6_null_entry) {	\
    
    		struct fib6_node *pn; \
    
    		while (1) { \
    
    			if (fn->fn_flags & RTN_TL_ROOT) \
    				goto out; \
    			pn = fn->parent; \
    			if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
    
    				fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
    
    			else \
    				fn = pn; \
    			if (fn->fn_flags & RTN_RTINFO) \
    				goto restart; \
    
    static struct rt6_info *ip6_pol_route_lookup(struct net *net,
    					     struct fib6_table *table,
    
    					     struct flowi *fl, int flags)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct fib6_node *fn;
    	struct rt6_info *rt;
    
    
    	read_lock_bh(&table->tb6_lock);
    	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
    restart:
    	rt = fn->leaf;
    
    	rt = rt6_device_match(net, rt, fl->oif, flags);
    	BACKTRACK(net, &fl->fl6_src);
    
    	dst_use(&rt->u.dst, jiffies);
    
    	read_unlock_bh(&table->tb6_lock);
    	return rt;
    
    }
    
    
    struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
    			    struct in6_addr *saddr, int oif, int strict)
    
    {
    	struct flowi fl = {
    		.oif = oif,
    		.nl_u = {
    			.ip6_u = {
    				.daddr = *daddr,
    			},
    		},
    	};
    	struct dst_entry *dst;
    
    	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
    
    	if (saddr) {
    		memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
    		flags |= RT6_LOOKUP_F_HAS_SADDR;
    	}
    
    
    	dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
    
    	if (dst->error == 0)
    		return (struct rt6_info *) dst;
    
    	dst_release(dst);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return NULL;
    }
    
    
    EXPORT_SYMBOL(rt6_lookup);
    
    
    /* ip6_ins_rt is called with FREE table->tb6_lock.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
       It takes new route entry, the addition fails by any reason the
       route is freed. In any case, if caller does not hold it, it may
       be destroyed.
     */
    
    
    static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	int err;
    
    	struct fib6_table *table;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	table = rt->rt6i_table;
    	write_lock_bh(&table->tb6_lock);
    
    	err = fib6_add(&table->tb6_root, rt, info);
    
    	write_unlock_bh(&table->tb6_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	return err;
    }
    
    
    int ip6_ins_rt(struct rt6_info *rt)
    {
    
    	struct nl_info info = {
    
    		.nl_net = rt->rt6i_dev->nd_net,
    
    	return __ip6_ins_rt(rt, &info);
    
    static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
    				      struct in6_addr *saddr)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct rt6_info *rt;
    
    	/*
    	 *	Clone the route.
    	 */
    
    	rt = ip6_rt_copy(ort);
    
    	if (rt) {
    
    		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
    			if (rt->rt6i_dst.plen != 128 &&
    			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
    				rt->rt6i_flags |= RTF_ANYCAST;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		rt->rt6i_dst.plen = 128;
    		rt->rt6i_flags |= RTF_CACHE;
    		rt->u.dst.flags |= DST_HOST;
    
    #ifdef CONFIG_IPV6_SUBTREES
    		if (rt->rt6i_src.plen && saddr) {
    			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
    			rt->rt6i_src.plen = 128;
    		}
    #endif
    
    		rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
    {
    	struct rt6_info *rt = ip6_rt_copy(ort);
    	if (rt) {
    		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
    		rt->rt6i_dst.plen = 128;
    		rt->rt6i_flags |= RTF_CACHE;
    		rt->u.dst.flags |= DST_HOST;
    		rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
    	}
    	return rt;
    }
    
    
    static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
    				      struct flowi *fl, int flags)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct fib6_node *fn;
    
    	int strict = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int attempts = 3;
    
    	int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	strict |= flags & RT6_LOOKUP_F_IFACE;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    relookup:
    
    	read_lock_bh(&table->tb6_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    restart:
    
    	rt = rt6_select(fn, oif, strict | reachable);
    
    
    	BACKTRACK(net, &fl->fl6_src);
    	if (rt == net->ipv6.ip6_null_entry ||
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	read_unlock_bh(&table->tb6_lock);
    
    	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
    
    		nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
    
    		nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
    
    	rt = nrt ? : net->ipv6.ip6_null_entry;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		err = ip6_ins_rt(nrt);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			goto out2;
    	}
    
    
    	 * Race condition! In the gap, when table->tb6_lock was
    
    	 * released someone could insert this route.  Relookup.
    	 */
    	dst_release(&rt->u.dst);
    	goto relookup;
    
    out:
    
    	read_unlock_bh(&table->tb6_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out2:
    	rt->u.dst.lastuse = jiffies;
    	rt->u.dst.__use++;
    
    
    	return rt;
    
    static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
    
    					    struct flowi *fl, int flags)
    {
    
    	return ip6_pol_route(net, table, fl->iif, fl, flags);
    
    void ip6_route_input(struct sk_buff *skb)
    {
    
    	struct ipv6hdr *iph = ipv6_hdr(skb);
    
    	struct net *net = skb->dev->nd_net;
    
    	int flags = RT6_LOOKUP_F_HAS_SADDR;
    
    	struct flowi fl = {
    		.iif = skb->dev->ifindex,
    		.nl_u = {
    			.ip6_u = {
    				.daddr = iph->daddr,
    				.saddr = iph->saddr,
    
    				.flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
    
    		.mark = skb->mark,
    
    		.proto = iph->nexthdr,
    	};
    
    
    	if (rt6_need_strict(&iph->daddr))
    		flags |= RT6_LOOKUP_F_IFACE;
    
    	skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
    
    static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
    
    					     struct flowi *fl, int flags)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	return ip6_pol_route(net, table, fl->oif, fl, flags);
    
    struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
    				    struct flowi *fl)
    
    {
    	int flags = 0;
    
    	if (rt6_need_strict(&fl->fl6_dst))
    
    		flags |= RT6_LOOKUP_F_IFACE;
    
    	if (!ipv6_addr_any(&fl->fl6_src))
    		flags |= RT6_LOOKUP_F_HAS_SADDR;
    
    
    	return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
    
    EXPORT_SYMBOL(ip6_route_output);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
    {
    	struct rt6_info *ort = (struct rt6_info *) *dstp;
    	struct rt6_info *rt = (struct rt6_info *)
    		dst_alloc(&ip6_dst_blackhole_ops);
    	struct dst_entry *new = NULL;
    
    	if (rt) {
    		new = &rt->u.dst;
    
    		atomic_set(&new->__refcnt, 1);
    		new->__use = 1;
    
    		new->input = dst_discard;
    		new->output = dst_discard;
    
    
    		memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
    		new->dev = ort->u.dst.dev;
    		if (new->dev)
    			dev_hold(new->dev);
    		rt->rt6i_idev = ort->rt6i_idev;
    		if (rt->rt6i_idev)
    			in6_dev_hold(rt->rt6i_idev);
    		rt->rt6i_expires = 0;
    
    		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
    		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
    		rt->rt6i_metric = 0;
    
    		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
    #ifdef CONFIG_IPV6_SUBTREES
    		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
    #endif
    
    		dst_free(new);
    	}
    
    	dst_release(*dstp);
    	*dstp = new;
    	return (new ? 0 : -ENOMEM);
    }
    EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	Destination cache support functions
     */
    
    static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
    {
    	struct rt6_info *rt;
    
    	rt = (struct rt6_info *) dst;
    
    	if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
    		return dst;
    
    	return NULL;
    }
    
    static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
    {
    	struct rt6_info *rt = (struct rt6_info *) dst;
    
    	if (rt) {
    		if (rt->rt6i_flags & RTF_CACHE)
    
    			ip6_del_rt(rt);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		else
    			dst_release(dst);
    	}
    	return NULL;
    }
    
    static void ip6_link_failure(struct sk_buff *skb)
    {
    	struct rt6_info *rt;
    
    	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
    
    	rt = (struct rt6_info *) skb->dst;
    	if (rt) {
    		if (rt->rt6i_flags&RTF_CACHE) {
    			dst_set_expires(&rt->u.dst, 0);
    			rt->rt6i_flags |= RTF_EXPIRES;
    		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
    			rt->rt6i_node->fn_sernum = -1;
    	}
    }
    
    static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
    {
    	struct rt6_info *rt6 = (struct rt6_info*)dst;
    
    	if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
    		rt6->rt6i_flags |= RTF_MODIFIED;
    		if (mtu < IPV6_MIN_MTU) {
    			mtu = IPV6_MIN_MTU;
    			dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
    		}
    		dst->metrics[RTAX_MTU-1] = mtu;
    
    		call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    }
    
    static int ipv6_get_mtu(struct net_device *dev);
    
    
    static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
    
    
    	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
    		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/*
    
    	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
    	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
    	 * IPV6_MAXPLEN is also valid and means: "any MSS,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 * rely only on pmtu discovery"
    	 */
    	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
    		mtu = IPV6_MAXPLEN;
    	return mtu;
    }
    
    
    static struct dst_entry *icmp6_dst_gc_list;
    static DEFINE_SPINLOCK(icmp6_dst_lock);
    
    struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				  struct neighbour *neigh,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct rt6_info *rt;
    	struct inet6_dev *idev = in6_dev_get(dev);
    
    	struct net *net = dev->nd_net;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (unlikely(idev == NULL))
    		return NULL;
    
    
    	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (unlikely(rt == NULL)) {
    		in6_dev_put(idev);
    		goto out;
    	}
    
    	dev_hold(dev);
    	if (neigh)
    		neigh_hold(neigh);
    	else
    		neigh = ndisc_get_neigh(dev, addr);
    
    	rt->rt6i_dev	  = dev;
    	rt->rt6i_idev     = idev;
    	rt->rt6i_nexthop  = neigh;
    	atomic_set(&rt->u.dst.__refcnt, 1);
    	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
    	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
    
    	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
    
    	rt->u.dst.output  = ip6_output;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    #if 0	/* there's no chance to use these for ndisc */
    
    	rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
    				? DST_HOST
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				: 0;
    	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
    	rt->rt6i_dst.plen = 128;
    #endif
    
    
    	spin_lock_bh(&icmp6_dst_lock);
    	rt->u.dst.next = icmp6_dst_gc_list;
    	icmp6_dst_gc_list = &rt->u.dst;
    	spin_unlock_bh(&icmp6_dst_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	fib6_force_start_gc(net);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    out:
    
    int icmp6_dst_gc(int *more)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct dst_entry *dst, *next, **pprev;
    	int freed;
    
    	next = NULL;
    
    	spin_lock_bh(&icmp6_dst_lock);
    	pprev = &icmp6_dst_gc_list;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	while ((dst = *pprev) != NULL) {
    		if (!atomic_read(&dst->__refcnt)) {
    			*pprev = dst->next;
    			dst_free(dst);
    			freed++;
    		} else {
    			pprev = &dst->next;
    			(*more)++;
    		}
    	}
    
    
    	spin_unlock_bh(&icmp6_dst_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return freed;
    }
    
    
    static int ip6_dst_gc(struct dst_ops *ops)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	unsigned long now = jiffies;
    
    	struct net *net = ops->dst_net;
    	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
    	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
    	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;