Skip to content
Snippets Groups Projects
ip6_tunnel.c 35.1 KiB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
/*
Linus Torvalds's avatar
Linus Torvalds committed
 *	Linux INET6 implementation
 *
 *	Authors:
 *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
 *	Yasuyuki Kozakai	<kozakai@linux-ipv6.org>
Linus Torvalds's avatar
Linus Torvalds committed
 *
 *	$Id$
 *
 *      Based on:
 *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
Linus Torvalds's avatar
Linus Torvalds committed
 *
 *      RFC 2473
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 */

#include <linux/module.h>
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sockios.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/if_tunnel.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/route.h>
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>

#include <asm/uaccess.h>
#include <asm/atomic.h>

Linus Torvalds's avatar
Linus Torvalds committed
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/ip6_tunnel.h>
#include <net/xfrm.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
Linus Torvalds's avatar
Linus Torvalds committed

MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
Linus Torvalds's avatar
Linus Torvalds committed
MODULE_LICENSE("GPL");

#define IPV6_TLV_TEL_DST_SIZE 8

#ifdef IP6_TNL_DEBUG
#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __func__)
Linus Torvalds's avatar
Linus Torvalds committed
#else
#define IP6_TNL_TRACE(x...) do {;} while(0)
#endif

#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
#define IPV6_TCLASS_SHIFT 20
Linus Torvalds's avatar
Linus Torvalds committed

#define HASH_SIZE  32

#define HASH(addr) ((__force u32)((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
		     (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
		    (HASH_SIZE - 1))
Linus Torvalds's avatar
Linus Torvalds committed

static int ip6_fb_tnl_dev_init(struct net_device *dev);
static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
Linus Torvalds's avatar
Linus Torvalds committed

static int ip6_tnl_net_id;
struct ip6_tnl_net {
	/* the IPv6 tunnel fallback device */
	struct net_device *fb_tnl_dev;
	/* lists for storing tunnels in use */
	struct ip6_tnl *tnls_r_l[HASH_SIZE];
	struct ip6_tnl *tnls_wc[1];
	struct ip6_tnl **tnls[2];
Linus Torvalds's avatar
Linus Torvalds committed
/* lock for the tunnel lists */
static DEFINE_RWLOCK(ip6_tnl_lock);
Linus Torvalds's avatar
Linus Torvalds committed

static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
{
	struct dst_entry *dst = t->dst_cache;

	if (dst && dst->obsolete &&
Linus Torvalds's avatar
Linus Torvalds committed
	    dst->ops->check(dst, t->dst_cookie) == NULL) {
		t->dst_cache = NULL;
		dst_release(dst);
		return NULL;
	}

	return dst;
}

static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
{
	dst_release(t->dst_cache);
	t->dst_cache = NULL;
}

static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
{
	struct rt6_info *rt = (struct rt6_info *) dst;
	t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
	dst_release(t->dst_cache);
	t->dst_cache = dst;
}

/**
 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
 *   @remote: the address of the tunnel exit-point
 *   @local: the address of the tunnel entry-point
Linus Torvalds's avatar
Linus Torvalds committed
 *
Linus Torvalds's avatar
Linus Torvalds committed
 *   tunnel matching given end-points if found,
 *   else fallback tunnel if its device is up,
Linus Torvalds's avatar
Linus Torvalds committed
 *   else %NULL
 **/

static struct ip6_tnl *
ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
Linus Torvalds's avatar
Linus Torvalds committed
{
	unsigned h0 = HASH(remote);
	unsigned h1 = HASH(local);
	struct ip6_tnl *t;
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
Linus Torvalds's avatar
Linus Torvalds committed

	for (t = ip6n->tnls_r_l[h0 ^ h1]; t; t = t->next) {
Linus Torvalds's avatar
Linus Torvalds committed
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
		    ipv6_addr_equal(remote, &t->parms.raddr) &&
		    (t->dev->flags & IFF_UP))
			return t;
	}
	if ((t = ip6n->tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
Linus Torvalds's avatar
Linus Torvalds committed
		return t;

	return NULL;
}

/**
 * ip6_tnl_bucket - get head of list matching given tunnel parameters
 *   @p: parameters containing tunnel end-points
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Description:
 *   ip6_tnl_bucket() returns the head of the list matching the
Linus Torvalds's avatar
Linus Torvalds committed
 *   &struct in6_addr entries laddr and raddr in @p.
 *
 * Return: head of IPv6 tunnel list
Linus Torvalds's avatar
Linus Torvalds committed
 **/

static struct ip6_tnl **
ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct in6_addr *remote = &p->raddr;
	struct in6_addr *local = &p->laddr;
	unsigned h = 0;
	int prio = 0;

	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
		prio = 1;
		h = HASH(remote) ^ HASH(local);
	}
	return &ip6n->tnls[prio][h];
 * ip6_tnl_link - add tunnel to hash table
Linus Torvalds's avatar
Linus Torvalds committed
 *   @t: tunnel to be added
 **/

static void
ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms);
Linus Torvalds's avatar
Linus Torvalds committed

	t->next = *tp;
	write_lock_bh(&ip6_tnl_lock);
Linus Torvalds's avatar
Linus Torvalds committed
	*tp = t;
	write_unlock_bh(&ip6_tnl_lock);
 * ip6_tnl_unlink - remove tunnel from hash table
Linus Torvalds's avatar
Linus Torvalds committed
 *   @t: tunnel to be removed
 **/

static void
ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl **tp;

	for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) {
Linus Torvalds's avatar
Linus Torvalds committed
		if (t == *tp) {
			write_lock_bh(&ip6_tnl_lock);
Linus Torvalds's avatar
Linus Torvalds committed
			*tp = t->next;
			write_unlock_bh(&ip6_tnl_lock);
Linus Torvalds's avatar
Linus Torvalds committed
			break;
		}
	}
}

/**
 * ip6_tnl_create() - create a new tunnel
 *   @p: tunnel parameters
 *   @pt: pointer to new tunnel
 *
 * Description:
 *   Create tunnel matching given parameters.
 *   created tunnel or NULL
Linus Torvalds's avatar
Linus Torvalds committed
 **/

static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct net_device *dev;
	struct ip6_tnl *t;
	char name[IFNAMSIZ];
	int err;
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
Linus Torvalds's avatar
Linus Torvalds committed

Linus Torvalds's avatar
Linus Torvalds committed
		strlcpy(name, p->name, IFNAMSIZ);
	dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
Linus Torvalds's avatar
Linus Torvalds committed
	if (dev == NULL)
Linus Torvalds's avatar
Linus Torvalds committed

	if (strchr(name, '%')) {
		if (dev_alloc_name(dev, name) < 0)
			goto failed_free;
	}

	dev->init = ip6_tnl_dev_init;
Linus Torvalds's avatar
Linus Torvalds committed
	t->parms = *p;

	if ((err = register_netdevice(dev)) < 0)
		goto failed_free;

Linus Torvalds's avatar
Linus Torvalds committed
	dev_hold(dev);

failed_free:
	free_netdev(dev);
 * ip6_tnl_locate - find or create tunnel matching given parameters
 *   @p: tunnel parameters
Linus Torvalds's avatar
Linus Torvalds committed
 *   @create: != 0 if allowed to create new tunnel if no match found
 *
 * Description:
 *   ip6_tnl_locate() first tries to locate an existing tunnel
Linus Torvalds's avatar
Linus Torvalds committed
 *   based on @parms. If this is unsuccessful, but @create is set a new
 *   tunnel device is created and registered for use.
 *
 * Return:
 *   matching tunnel or NULL
Linus Torvalds's avatar
Linus Torvalds committed
 **/

static struct ip6_tnl *ip6_tnl_locate(struct net *net,
		struct ip6_tnl_parm *p, int create)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct in6_addr *remote = &p->raddr;
	struct in6_addr *local = &p->laddr;
	struct ip6_tnl *t;
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
Linus Torvalds's avatar
Linus Torvalds committed

	for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) {
Linus Torvalds's avatar
Linus Torvalds committed
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
		    ipv6_addr_equal(remote, &t->parms.raddr))
			return t;
Linus Torvalds's avatar
Linus Torvalds committed
	}
	if (!create)
	return ip6_tnl_create(net, p);
 * ip6_tnl_dev_uninit - tunnel device uninitializer
Linus Torvalds's avatar
Linus Torvalds committed
 *   @dev: the device to be destroyed
Linus Torvalds's avatar
Linus Torvalds committed
 * Description:
 *   ip6_tnl_dev_uninit() removes tunnel from its list
Linus Torvalds's avatar
Linus Torvalds committed
 **/

static void
ip6_tnl_dev_uninit(struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
Linus Torvalds's avatar
Linus Torvalds committed

	if (dev == ip6n->fb_tnl_dev) {
		write_lock_bh(&ip6_tnl_lock);
		ip6n->tnls_wc[0] = NULL;
		write_unlock_bh(&ip6_tnl_lock);
Linus Torvalds's avatar
Linus Torvalds committed
	} else {
		ip6_tnl_unlink(ip6n, t);
Linus Torvalds's avatar
Linus Torvalds committed
	}
	ip6_tnl_dst_reset(t);
	dev_put(dev);
}

/**
 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
 *   @skb: received socket buffer
 *
 * Return:
 *   0 if none was found,
Linus Torvalds's avatar
Linus Torvalds committed
 *   else index to encapsulation limit
 **/

static __u16
parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
{
	struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
	__u8 nexthdr = ipv6h->nexthdr;
	__u16 off = sizeof (*ipv6h);

	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
		__u16 optlen = 0;
		struct ipv6_opt_hdr *hdr;
		if (raw + off + sizeof (*hdr) > skb->data &&
		    !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
			break;

		hdr = (struct ipv6_opt_hdr *) (raw + off);
		if (nexthdr == NEXTHDR_FRAGMENT) {
			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
			if (frag_hdr->frag_off)
				break;
			optlen = 8;
		} else if (nexthdr == NEXTHDR_AUTH) {
			optlen = (hdr->hdrlen + 2) << 2;
		} else {
			optlen = ipv6_optlen(hdr);
		}
		if (nexthdr == NEXTHDR_DEST) {
			__u16 i = off + 2;
			while (1) {
				struct ipv6_tlv_tnl_enc_lim *tel;

				/* No more room for encapsulation limit */
				if (i + sizeof (*tel) > off + optlen)
					break;

				tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
				/* return index of option if found and valid */
				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
				    tel->length == 1)
					return i;
				/* else jump to next option */
				if (tel->type)
					i += tel->length + 2;
				else
					i++;
			}
		}
		nexthdr = hdr->nexthdr;
		off += optlen;
	}
	return 0;
}

/**
 * ip6_tnl_err - tunnel error handler
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Description:
 *   ip6_tnl_err() should handle errors in the tunnel according
Linus Torvalds's avatar
Linus Torvalds committed
 *   to the specifications in RFC 2473.
 **/

static int
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
	    int *type, int *code, int *msg, __u32 *info, int offset)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
	struct ip6_tnl *t;
	int rel_msg = 0;
	int rel_type = ICMPV6_DEST_UNREACH;
	int rel_code = ICMPV6_ADDR_UNREACH;
	__u32 rel_info = 0;
	__u16 len;
	int err = -ENOENT;
Linus Torvalds's avatar
Linus Torvalds committed

	/* If the packet doesn't contain the original IPv6 header we are
	   in trouble since we might need the source address for further
Linus Torvalds's avatar
Linus Torvalds committed
	   processing of the error. */

	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
					&ipv6h->saddr)) == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
		goto out;

	if (t->parms.proto != ipproto && t->parms.proto != 0)
		goto out;

Linus Torvalds's avatar
Linus Torvalds committed
		__u32 teli;
		struct ipv6_tlv_tnl_enc_lim *tel;
		__u32 mtu;
	case ICMPV6_DEST_UNREACH:
		if (net_ratelimit())
			printk(KERN_WARNING
			       "%s: Path to destination invalid "
			       "or inactive!\n", t->parms.name);
		rel_msg = 1;
		break;
	case ICMPV6_TIME_EXCEED:
		if ((*code) == ICMPV6_EXC_HOPLIMIT) {
Linus Torvalds's avatar
Linus Torvalds committed
			if (net_ratelimit())
				printk(KERN_WARNING
				       "%s: Too small hop limit or "
				       "routing loop in tunnel!\n",
Linus Torvalds's avatar
Linus Torvalds committed
				       t->parms.name);
			rel_msg = 1;
		}
		break;
	case ICMPV6_PARAMPROB:
		if ((*code) == ICMPV6_HDR_FIELD)
			teli = parse_tlv_tnl_enc_lim(skb, skb->data);
Linus Torvalds's avatar
Linus Torvalds committed

		if (teli && teli == *info - 2) {
Linus Torvalds's avatar
Linus Torvalds committed
			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
			if (tel->encap_limit == 0) {
				if (net_ratelimit())
					printk(KERN_WARNING
					       "%s: Too small encapsulation "
					       "limit or routing loop in "
					       "tunnel!\n", t->parms.name);
				rel_msg = 1;
			}
		} else if (net_ratelimit()) {
			printk(KERN_WARNING
			       "%s: Recipient unable to parse tunneled "
			       "packet!\n ", t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
		}
		break;
	case ICMPV6_PKT_TOOBIG:
		mtu = *info - offset;
Linus Torvalds's avatar
Linus Torvalds committed
		if (mtu < IPV6_MIN_MTU)
			mtu = IPV6_MIN_MTU;
		t->dev->mtu = mtu;

		if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
Linus Torvalds's avatar
Linus Torvalds committed
			rel_type = ICMPV6_PKT_TOOBIG;
			rel_code = 0;
			rel_info = mtu;
			rel_msg = 1;
		}
		break;
	}

	*type = rel_type;
	*code = rel_code;
	*info = rel_info;
	*msg = rel_msg;

out:
	read_unlock(&ip6_tnl_lock);
static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
	   int type, int code, int offset, __be32 info)
{
	int rel_msg = 0;
	int rel_type = type;
	int rel_code = code;
	__u32 rel_info = ntohl(info);
	int err;
	struct sk_buff *skb2;
	struct iphdr *eiph;
	struct flowi fl;
	struct rtable *rt;

	err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
	if (err < 0)
		return err;

	if (rel_msg == 0)
		return 0;

	switch (rel_type) {
	case ICMPV6_DEST_UNREACH:
		if (rel_code != ICMPV6_ADDR_UNREACH)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_HOST_UNREACH;
		break;
	case ICMPV6_PKT_TOOBIG:
		if (rel_code != 0)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_FRAG_NEEDED;
		break;
	default:
		return 0;
	}

	if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
		return 0;

	skb2 = skb_clone(skb, GFP_ATOMIC);
	if (!skb2)
		return 0;

	dst_release(skb2->dst);
	skb2->dst = NULL;
	skb_pull(skb2, offset);
	skb_reset_network_header(skb2);

	/* Try to guess incoming interface */
	memset(&fl, 0, sizeof(fl));
	fl.fl4_dst = eiph->saddr;
	fl.fl4_tos = RT_TOS(eiph->tos);
	fl.proto = IPPROTO_IPIP;
	if (ip_route_output_key(&init_net, &rt, &fl))
		goto out;

	skb2->dev = rt->u.dst.dev;

	/* route "incoming" packet */
	if (rt->rt_flags & RTCF_LOCAL) {
		ip_rt_put(rt);
		rt = NULL;
		fl.fl4_dst = eiph->daddr;
		fl.fl4_src = eiph->saddr;
		fl.fl4_tos = eiph->tos;
		if (ip_route_output_key(&init_net, &rt, &fl) ||
		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
			ip_rt_put(rt);
			goto out;
		}
		skb2->dst = (struct dst_entry *)rt;
	} else {
		ip_rt_put(rt);
		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
				   skb2->dev) ||
		    skb2->dst->dev->type != ARPHRD_TUNNEL)
			goto out;
	}

	/* change mtu on this route */
	if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
		if (rel_info > dst_mtu(skb2->dst))
			goto out;

		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
	}

	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
	   int type, int code, int offset, __be32 info)
{
	int rel_msg = 0;
	int rel_type = type;
	int rel_code = code;
	__u32 rel_info = ntohl(info);
	err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
	if (err < 0)
		return err;

	if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
Linus Torvalds's avatar
Linus Torvalds committed
		struct rt6_info *rt;
		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
Linus Torvalds's avatar
Linus Torvalds committed
		if (!skb2)
Linus Torvalds's avatar
Linus Torvalds committed

		dst_release(skb2->dst);
		skb2->dst = NULL;
		skb_pull(skb2, offset);
		skb_reset_network_header(skb2);
Linus Torvalds's avatar
Linus Torvalds committed

		/* Try to guess incoming interface */
		rt = rt6_lookup(&init_net, &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
Linus Torvalds's avatar
Linus Torvalds committed

		if (rt && rt->rt6i_dev)
			skb2->dev = rt->rt6i_dev;

		icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);

		if (rt)
			dst_release(&rt->u.dst);

		kfree_skb(skb2);
	}
static void ip4ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
					struct ipv6hdr *ipv6h,
					struct sk_buff *skb)
{
	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;

	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
		IP_ECN_set_ce(ip_hdr(skb));
static void ip6ip6_dscp_ecn_decapsulate(struct ip6_tnl *t,
					struct ipv6hdr *ipv6h,
					struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
{
	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
Linus Torvalds's avatar
Linus Torvalds committed

	if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
		IP6_ECN_set_ce(ipv6_hdr(skb));
Linus Torvalds's avatar
Linus Torvalds committed
}
static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t)
{
	struct ip6_tnl_parm *p = &t->parms;
	int ret = 0;

	if (p->flags & IP6_TNL_F_CAP_RCV) {
		struct net_device *ldev = NULL;
			ldev = dev_get_by_index(&init_net, p->link);

		if ((ipv6_addr_is_multicast(&p->laddr) ||
		     likely(ipv6_chk_addr(&init_net, &p->laddr, ldev, 0))) &&
		    likely(!ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
Linus Torvalds's avatar
Linus Torvalds committed

/**
 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
Linus Torvalds's avatar
Linus Torvalds committed
 *   @skb: received socket buffer
 *   @protocol: ethernet protocol ID
 *   @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Return: 0
 **/

static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
		       void (*dscp_ecn_decapsulate)(struct ip6_tnl *t,
						    struct ipv6hdr *ipv6h,
						    struct sk_buff *skb))
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl *t;
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed

Linus Torvalds's avatar
Linus Torvalds committed

	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
					&ipv6h->daddr)) != NULL) {
		if (t->parms.proto != ipproto && t->parms.proto != 0) {
			read_unlock(&ip6_tnl_lock);
			goto discard;
		}

Linus Torvalds's avatar
Linus Torvalds committed
		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
			read_unlock(&ip6_tnl_lock);
Linus Torvalds's avatar
Linus Torvalds committed
			t->stat.rx_dropped++;
			read_unlock(&ip6_tnl_lock);
Linus Torvalds's avatar
Linus Torvalds committed
			goto discard;
		}
		secpath_reset(skb);
		skb_reset_network_header(skb);
		skb->protocol = htons(protocol);
Linus Torvalds's avatar
Linus Torvalds committed
		skb->pkt_type = PACKET_HOST;
		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
		skb->dev = t->dev;
		dst_release(skb->dst);
		skb->dst = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
		t->stat.rx_packets++;
		t->stat.rx_bytes += skb->len;
		netif_rx(skb);
		read_unlock(&ip6_tnl_lock);
Linus Torvalds's avatar
Linus Torvalds committed
		return 0;
	}
	read_unlock(&ip6_tnl_lock);
Linus Torvalds's avatar
Linus Torvalds committed
	return 1;

discard:
	kfree_skb(skb);
	return 0;
static int ip4ip6_rcv(struct sk_buff *skb)
{
	return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
			   ip4ip6_dscp_ecn_decapsulate);
static int ip6ip6_rcv(struct sk_buff *skb)
{
	return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
			   ip6ip6_dscp_ecn_decapsulate);
struct ipv6_tel_txoption {
	struct ipv6_txoptions ops;
	__u8 dst_opt[8];
};
Linus Torvalds's avatar
Linus Torvalds committed

static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
{
	memset(opt, 0, sizeof(struct ipv6_tel_txoption));
Linus Torvalds's avatar
Linus Torvalds committed

	opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
	opt->dst_opt[3] = 1;
	opt->dst_opt[4] = encap_limit;
	opt->dst_opt[5] = IPV6_TLV_PADN;
	opt->dst_opt[6] = 1;
Linus Torvalds's avatar
Linus Torvalds committed

	opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
	opt->ops.opt_nflen = 8;
 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
Linus Torvalds's avatar
Linus Torvalds committed
 *   @t: the outgoing tunnel device
 *   @hdr: IPv6 header from the incoming packet
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Description:
 *   Avoid trivial tunneling loop by checking that tunnel exit-point
Linus Torvalds's avatar
Linus Torvalds committed
 *   doesn't match source of incoming packet.
 *
Linus Torvalds's avatar
Linus Torvalds committed
 *   1 if conflict,
 *   0 else
 **/

static inline int
ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
Linus Torvalds's avatar
Linus Torvalds committed
{
	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}

static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
{
	struct ip6_tnl_parm *p = &t->parms;
	int ret = 0;

	if (p->flags & IP6_TNL_F_CAP_XMIT) {
		struct net_device *ldev = NULL;

		if (p->link)
			ldev = dev_get_by_index(&init_net, p->link);
		if (unlikely(!ipv6_chk_addr(&init_net, &p->laddr, ldev, 0)))
			printk(KERN_WARNING
			       "%s xmit: Local address not yet configured!\n",
			       p->name);
		else if (!ipv6_addr_is_multicast(&p->raddr) &&
			 unlikely(ipv6_chk_addr(&init_net, &p->raddr, NULL, 0)))
			printk(KERN_WARNING
			       "%s xmit: Routing loop! "
			       "Remote address found on this node!\n",
			       p->name);
		else
			ret = 1;
		if (ldev)
			dev_put(ldev);
	}
	return ret;
}
Linus Torvalds's avatar
Linus Torvalds committed
/**
 * ip6_tnl_xmit2 - encapsulate packet and send
Linus Torvalds's avatar
Linus Torvalds committed
 *   @skb: the outgoing socket buffer
 *   @dev: the outgoing tunnel device
 *   @dsfield: dscp code for outer header
 *   @fl: flow of tunneled packet
 *   @encap_limit: encapsulation limit
 *   @pmtu: Path MTU is stored if packet is too big
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Description:
 *   Build new header and do some sanity checks on the packet before sending
 *   it.
 *
 *   -1 fail
 *   %-EMSGSIZE message too big. return mtu in this case.
Linus Torvalds's avatar
Linus Torvalds committed
 **/

static int ip6_tnl_xmit2(struct sk_buff *skb,
			 struct net_device *dev,
			 __u8 dsfield,
			 struct flowi *fl,
			 int encap_limit,
			 __u32 *pmtu)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl *t = netdev_priv(dev);
Linus Torvalds's avatar
Linus Torvalds committed
	struct net_device_stats *stats = &t->stat;
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
	struct dst_entry *dst;
	struct net_device *tdev;
	int mtu;
	unsigned int max_headroom = sizeof(struct ipv6hdr);
Linus Torvalds's avatar
Linus Torvalds committed
	u8 proto;
Linus Torvalds's avatar
Linus Torvalds committed
	int pkt_len;

	if ((dst = ip6_tnl_dst_check(t)) != NULL)
		dst_hold(dst);
		dst = ip6_route_output(&init_net, NULL, fl);
Linus Torvalds's avatar
Linus Torvalds committed

		if (dst->error || xfrm_lookup(&dst, fl, NULL, 0) < 0)
Linus Torvalds's avatar
Linus Torvalds committed

	tdev = dst->dev;

	if (tdev == dev) {
		stats->collisions++;
		if (net_ratelimit())
			printk(KERN_WARNING
Linus Torvalds's avatar
Linus Torvalds committed
			       "%s: Local routing loop detected!\n",
			       t->parms.name);
		goto tx_err_dst_release;
	}
	mtu = dst_mtu(dst) - sizeof (*ipv6h);
Linus Torvalds's avatar
Linus Torvalds committed
		max_headroom += 8;
		mtu -= 8;
	}
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;
	if (skb->dst)
		skb->dst->ops->update_pmtu(skb->dst, mtu);
Linus Torvalds's avatar
Linus Torvalds committed
	if (skb->len > mtu) {
Linus Torvalds's avatar
Linus Torvalds committed
		goto tx_err_dst_release;
	}

	/*
	 * Okay, now see if we can stuff it in the buffer as-is.
	 */
	max_headroom += LL_RESERVED_SPACE(tdev);
	if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
Linus Torvalds's avatar
Linus Torvalds committed
		struct sk_buff *new_skb;
Linus Torvalds's avatar
Linus Torvalds committed
		if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
			goto tx_err_dst_release;

		if (skb->sk)
			skb_set_owner_w(new_skb, skb->sk);
		kfree_skb(skb);
		skb = new_skb;
	}
	dst_release(skb->dst);
	skb->dst = dst_clone(dst);

	skb->transport_header = skb->network_header;
Linus Torvalds's avatar
Linus Torvalds committed

	if (encap_limit >= 0) {
		init_tel_txopt(&opt, encap_limit);
		ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL);
	}
	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
	*(__be32*)ipv6h = fl->fl6_flowlabel | htonl(0x60000000);
Linus Torvalds's avatar
Linus Torvalds committed
	dsfield = INET_ECN_encapsulate(0, dsfield);
	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
	ipv6h->hop_limit = t->parms.hop_limit;
	ipv6h->nexthdr = proto;
	ipv6_addr_copy(&ipv6h->saddr, &fl->fl6_src);
	ipv6_addr_copy(&ipv6h->daddr, &fl->fl6_dst);
Linus Torvalds's avatar
Linus Torvalds committed
	nf_reset(skb);
	pkt_len = skb->len;
Herbert Xu's avatar
Herbert Xu committed
	err = ip6_local_out(skb);
Linus Torvalds's avatar
Linus Torvalds committed

	if (net_xmit_eval(err) == 0) {
Linus Torvalds's avatar
Linus Torvalds committed
		stats->tx_bytes += pkt_len;
		stats->tx_packets++;
	} else {
		stats->tx_errors++;
		stats->tx_aborted_errors++;
	}
	ip6_tnl_dst_store(t, dst);
	return 0;
tx_err_link_failure:
	stats->tx_carrier_errors++;
	dst_link_failure(skb);
tx_err_dst_release:
	dst_release(dst);
static inline int
ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct iphdr  *iph = ip_hdr(skb);
	int encap_limit = -1;
	struct flowi fl;
	__u8 dsfield;
	__u32 mtu;
	int err;

	if ((t->parms.proto != IPPROTO_IPIP && t->parms.proto != 0) ||
	    !ip6_tnl_xmit_ctl(t))
		return -1;

	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
		encap_limit = t->parms.encap_limit;

	memcpy(&fl, &t->fl, sizeof (fl));
	fl.proto = IPPROTO_IPIP;

	dsfield = ipv4_get_dsfield(iph);

	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
		fl.fl6_flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT)
					  & IPV6_TCLASS_MASK;

	err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu);
	if (err != 0) {
		/* XXX: send ICMP error even if DF is not set. */
		if (err == -EMSGSIZE)
			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
				  htonl(mtu));
		return -1;
	}

	return 0;
}

static inline int
ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
	int encap_limit = -1;
	__u16 offset;
	struct flowi fl;
	__u8 dsfield;
	__u32 mtu;
	int err;