Skip to content
Snippets Groups Projects
ip6_tunnel.c 43.9 KiB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
/*
Linus Torvalds's avatar
Linus Torvalds committed
 *	Linux INET6 implementation
 *
 *	Authors:
 *	Ville Nuorvala		<vnuorval@tcs.hut.fi>
 *	Yasuyuki Kozakai	<kozakai@linux-ipv6.org>
Linus Torvalds's avatar
Linus Torvalds committed
 *
 *      Based on:
 *      linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
Linus Torvalds's avatar
Linus Torvalds committed
 *
 *      RFC 2473
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 */

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

Linus Torvalds's avatar
Linus Torvalds committed
#include <linux/module.h>
#include <linux/capability.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sockios.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/if_tunnel.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/route.h>
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>
#include <linux/hash.h>
#include <linux/etherdevice.h>
Linus Torvalds's avatar
Linus Torvalds committed

#include <asm/uaccess.h>
#include <linux/atomic.h>
Linus Torvalds's avatar
Linus Torvalds committed

Linus Torvalds's avatar
Linus Torvalds committed
#include <net/ip.h>
#include <net/ip_tunnels.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/ip6_tunnel.h>
#include <net/xfrm.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
Linus Torvalds's avatar
Linus Torvalds committed

MODULE_AUTHOR("Ville Nuorvala");
MODULE_DESCRIPTION("IPv6 tunneling device");
Linus Torvalds's avatar
Linus Torvalds committed
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETDEV("ip6tnl0");
Linus Torvalds's avatar
Linus Torvalds committed

#ifdef IP6_TNL_DEBUG
#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__)
Linus Torvalds's avatar
Linus Torvalds committed
#else
#define IP6_TNL_TRACE(x...) do {;} while(0)
#endif

#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
#define IPV6_TCLASS_SHIFT 20
Linus Torvalds's avatar
Linus Torvalds committed

#define HASH_SIZE_SHIFT  5
#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
Linus Torvalds's avatar
Linus Torvalds committed

static bool log_ecn_error = true;
module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");

static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);

	return hash_32(hash, HASH_SIZE_SHIFT);
}
Linus Torvalds's avatar
Linus Torvalds committed

static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
static struct rtnl_link_ops ip6_link_ops __read_mostly;
Linus Torvalds's avatar
Linus Torvalds committed

static int ip6_tnl_net_id __read_mostly;
	/* the IPv6 tunnel fallback device */
	struct net_device *fb_tnl_dev;
	/* lists for storing tunnels in use */
	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
	struct ip6_tnl __rcu *tnls_wc[1];
	struct ip6_tnl __rcu **tnls[2];
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
	struct pcpu_tstats sum = { 0 };
	int i;

	for_each_possible_cpu(i) {
		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);

		sum.rx_packets += tstats->rx_packets;
		sum.rx_bytes   += tstats->rx_bytes;
		sum.tx_packets += tstats->tx_packets;
		sum.tx_bytes   += tstats->tx_bytes;
	}
	dev->stats.rx_packets = sum.rx_packets;
	dev->stats.rx_bytes   = sum.rx_bytes;
	dev->stats.tx_packets = sum.tx_packets;
	dev->stats.tx_bytes   = sum.tx_bytes;
	return &dev->stats;
}

 * Locking : hash tables are protected by RCU and RTNL
Linus Torvalds's avatar
Linus Torvalds committed

xeb@mail.ru's avatar
xeb@mail.ru committed
struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct dst_entry *dst = t->dst_cache;

	if (dst && dst->obsolete &&
Linus Torvalds's avatar
Linus Torvalds committed
	    dst->ops->check(dst, t->dst_cookie) == NULL) {
		t->dst_cache = NULL;
		dst_release(dst);
		return NULL;
	}

	return dst;
}
xeb@mail.ru's avatar
xeb@mail.ru committed
EXPORT_SYMBOL_GPL(ip6_tnl_dst_check);
Linus Torvalds's avatar
Linus Torvalds committed

xeb@mail.ru's avatar
xeb@mail.ru committed
void ip6_tnl_dst_reset(struct ip6_tnl *t)
Linus Torvalds's avatar
Linus Torvalds committed
{
	dst_release(t->dst_cache);
	t->dst_cache = NULL;
}
xeb@mail.ru's avatar
xeb@mail.ru committed
EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
Linus Torvalds's avatar
Linus Torvalds committed

xeb@mail.ru's avatar
xeb@mail.ru committed
void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct rt6_info *rt = (struct rt6_info *) dst;
	t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
	dst_release(t->dst_cache);
	t->dst_cache = dst;
}
xeb@mail.ru's avatar
xeb@mail.ru committed
EXPORT_SYMBOL_GPL(ip6_tnl_dst_store);
Linus Torvalds's avatar
Linus Torvalds committed

/**
 * ip6_tnl_lookup - fetch tunnel matching the end-point addresses
 *   @remote: the address of the tunnel exit-point
 *   @local: the address of the tunnel entry-point
Linus Torvalds's avatar
Linus Torvalds committed
 *
Linus Torvalds's avatar
Linus Torvalds committed
 *   tunnel matching given end-points if found,
 *   else fallback tunnel if its device is up,
Linus Torvalds's avatar
Linus Torvalds committed
 *   else %NULL
 **/

#define for_each_ip6_tunnel_rcu(start) \
	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))

Linus Torvalds's avatar
Linus Torvalds committed
static struct ip6_tnl *
ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
Linus Torvalds's avatar
Linus Torvalds committed
{
	unsigned int hash = HASH(remote, local);
Linus Torvalds's avatar
Linus Torvalds committed
	struct ip6_tnl *t;
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
Linus Torvalds's avatar
Linus Torvalds committed

	for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
Linus Torvalds's avatar
Linus Torvalds committed
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
		    ipv6_addr_equal(remote, &t->parms.raddr) &&
		    (t->dev->flags & IFF_UP))
			return t;
	}
	t = rcu_dereference(ip6n->tnls_wc[0]);
	if (t && (t->dev->flags & IFF_UP))
Linus Torvalds's avatar
Linus Torvalds committed
		return t;

	return NULL;
}

/**
 * ip6_tnl_bucket - get head of list matching given tunnel parameters
 *   @p: parameters containing tunnel end-points
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Description:
 *   ip6_tnl_bucket() returns the head of the list matching the
Linus Torvalds's avatar
Linus Torvalds committed
 *   &struct in6_addr entries laddr and raddr in @p.
 *
 * Return: head of IPv6 tunnel list
Linus Torvalds's avatar
Linus Torvalds committed
 **/

static struct ip6_tnl __rcu **
xeb@mail.ru's avatar
xeb@mail.ru committed
ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
Linus Torvalds's avatar
Linus Torvalds committed
{
	const struct in6_addr *remote = &p->raddr;
	const struct in6_addr *local = &p->laddr;
	unsigned int h = 0;
Linus Torvalds's avatar
Linus Torvalds committed
	int prio = 0;

	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
		prio = 1;
		h = HASH(remote, local);
Linus Torvalds's avatar
Linus Torvalds committed
	}
	return &ip6n->tnls[prio][h];
 * ip6_tnl_link - add tunnel to hash table
Linus Torvalds's avatar
Linus Torvalds committed
 *   @t: tunnel to be added
 **/

static void
ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
Linus Torvalds's avatar
Linus Torvalds committed

	rcu_assign_pointer(t->next , rtnl_dereference(*tp));
	rcu_assign_pointer(*tp, t);
 * ip6_tnl_unlink - remove tunnel from hash table
Linus Torvalds's avatar
Linus Torvalds committed
 *   @t: tunnel to be removed
 **/

static void
ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl __rcu **tp;
	struct ip6_tnl *iter;

	for (tp = ip6_tnl_bucket(ip6n, &t->parms);
	     (iter = rtnl_dereference(*tp)) != NULL;
	     tp = &iter->next) {
		if (t == iter) {
			rcu_assign_pointer(*tp, t->next);
static void ip6_dev_free(struct net_device *dev)
{
	free_percpu(dev->tstats);
	free_netdev(dev);
}

static int ip6_tnl_create2(struct net_device *dev)
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct net *net = dev_net(dev);
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
	int err;

	t = netdev_priv(dev);
	err = ip6_tnl_dev_init(dev);
	if (err < 0)
		goto out;

	err = register_netdevice(dev);
	if (err < 0)
		goto out;

	strcpy(t->parms.name, dev->name);
	dev->rtnl_link_ops = &ip6_link_ops;

	dev_hold(dev);
	ip6_tnl_link(ip6n, t);
	return 0;

out:
	return err;
}

Linus Torvalds's avatar
Linus Torvalds committed
/**
 * ip6_tnl_create - create a new tunnel
Linus Torvalds's avatar
Linus Torvalds committed
 *   @p: tunnel parameters
 *   @pt: pointer to new tunnel
 *
 * Description:
 *   Create tunnel matching given parameters.
 *   created tunnel or NULL
Linus Torvalds's avatar
Linus Torvalds committed
 **/

xeb@mail.ru's avatar
xeb@mail.ru committed
static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct net_device *dev;
	struct ip6_tnl *t;
	char name[IFNAMSIZ];
	int err;

Linus Torvalds's avatar
Linus Torvalds committed
		strlcpy(name, p->name, IFNAMSIZ);
	dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
Linus Torvalds's avatar
Linus Torvalds committed
	if (dev == NULL)
Linus Torvalds's avatar
Linus Torvalds committed

Linus Torvalds's avatar
Linus Torvalds committed
	t->parms = *p;
	t->net = dev_net(dev);
	err = ip6_tnl_create2(dev);
	if (err < 0)
		goto failed_free;
Linus Torvalds's avatar
Linus Torvalds committed

	ip6_dev_free(dev);
 * ip6_tnl_locate - find or create tunnel matching given parameters
 *   @p: tunnel parameters
Linus Torvalds's avatar
Linus Torvalds committed
 *   @create: != 0 if allowed to create new tunnel if no match found
 *
 * Description:
 *   ip6_tnl_locate() first tries to locate an existing tunnel
Linus Torvalds's avatar
Linus Torvalds committed
 *   based on @parms. If this is unsuccessful, but @create is set a new
 *   tunnel device is created and registered for use.
 *
 * Return:
 *   matching tunnel or NULL
Linus Torvalds's avatar
Linus Torvalds committed
 **/

static struct ip6_tnl *ip6_tnl_locate(struct net *net,
xeb@mail.ru's avatar
xeb@mail.ru committed
		struct __ip6_tnl_parm *p, int create)
Linus Torvalds's avatar
Linus Torvalds committed
{
	const struct in6_addr *remote = &p->raddr;
	const struct in6_addr *local = &p->laddr;
	struct ip6_tnl __rcu **tp;
Linus Torvalds's avatar
Linus Torvalds committed
	struct ip6_tnl *t;
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
Linus Torvalds's avatar
Linus Torvalds committed

	for (tp = ip6_tnl_bucket(ip6n, p);
	     (t = rtnl_dereference(*tp)) != NULL;
	     tp = &t->next) {
Linus Torvalds's avatar
Linus Torvalds committed
		if (ipv6_addr_equal(local, &t->parms.laddr) &&
		    ipv6_addr_equal(remote, &t->parms.raddr))
			return t;
Linus Torvalds's avatar
Linus Torvalds committed
	}
	if (!create)
	return ip6_tnl_create(net, p);
 * ip6_tnl_dev_uninit - tunnel device uninitializer
Linus Torvalds's avatar
Linus Torvalds committed
 *   @dev: the device to be destroyed
Linus Torvalds's avatar
Linus Torvalds committed
 * Description:
 *   ip6_tnl_dev_uninit() removes tunnel from its list
Linus Torvalds's avatar
Linus Torvalds committed
 **/

static void
ip6_tnl_dev_uninit(struct net_device *dev)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct net *net = t->net;
	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
Linus Torvalds's avatar
Linus Torvalds committed

	if (dev == ip6n->fb_tnl_dev)
		RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
		ip6_tnl_unlink(ip6n, t);
Linus Torvalds's avatar
Linus Torvalds committed
	ip6_tnl_dst_reset(t);
	dev_put(dev);
}

/**
 * parse_tvl_tnl_enc_lim - handle encapsulation limit option
 *   @skb: received socket buffer
 *
 * Return:
 *   0 if none was found,
Linus Torvalds's avatar
Linus Torvalds committed
 *   else index to encapsulation limit
 **/

xeb@mail.ru's avatar
xeb@mail.ru committed
__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
Linus Torvalds's avatar
Linus Torvalds committed
{
	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
Linus Torvalds's avatar
Linus Torvalds committed
	__u8 nexthdr = ipv6h->nexthdr;
	__u16 off = sizeof (*ipv6h);

	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
		__u16 optlen = 0;
		struct ipv6_opt_hdr *hdr;
		if (raw + off + sizeof (*hdr) > skb->data &&
		    !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
			break;

		hdr = (struct ipv6_opt_hdr *) (raw + off);
		if (nexthdr == NEXTHDR_FRAGMENT) {
			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
			if (frag_hdr->frag_off)
				break;
			optlen = 8;
		} else if (nexthdr == NEXTHDR_AUTH) {
			optlen = (hdr->hdrlen + 2) << 2;
		} else {
			optlen = ipv6_optlen(hdr);
		}
		if (nexthdr == NEXTHDR_DEST) {
			__u16 i = off + 2;
			while (1) {
				struct ipv6_tlv_tnl_enc_lim *tel;

				/* No more room for encapsulation limit */
				if (i + sizeof (*tel) > off + optlen)
					break;

				tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
				/* return index of option if found and valid */
				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
				    tel->length == 1)
					return i;
				/* else jump to next option */
				if (tel->type)
					i += tel->length + 2;
				else
					i++;
			}
		}
		nexthdr = hdr->nexthdr;
		off += optlen;
	}
	return 0;
}
xeb@mail.ru's avatar
xeb@mail.ru committed
EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
Linus Torvalds's avatar
Linus Torvalds committed

/**
 * ip6_tnl_err - tunnel error handler
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Description:
 *   ip6_tnl_err() should handle errors in the tunnel according
Linus Torvalds's avatar
Linus Torvalds committed
 *   to the specifications in RFC 2473.
 **/

static int
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
	    u8 *type, u8 *code, int *msg, __u32 *info, int offset)
Linus Torvalds's avatar
Linus Torvalds committed
{
	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
Linus Torvalds's avatar
Linus Torvalds committed
	struct ip6_tnl *t;
	int rel_msg = 0;
	u8 rel_type = ICMPV6_DEST_UNREACH;
	u8 rel_code = ICMPV6_ADDR_UNREACH;
Linus Torvalds's avatar
Linus Torvalds committed
	__u32 rel_info = 0;
	__u16 len;
	int err = -ENOENT;
Linus Torvalds's avatar
Linus Torvalds committed

	/* If the packet doesn't contain the original IPv6 header we are
	   in trouble since we might need the source address for further
Linus Torvalds's avatar
Linus Torvalds committed
	   processing of the error. */

	rcu_read_lock();
	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
					&ipv6h->saddr)) == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
		goto out;

	if (t->parms.proto != ipproto && t->parms.proto != 0)
		goto out;

Linus Torvalds's avatar
Linus Torvalds committed
		__u32 teli;
		struct ipv6_tlv_tnl_enc_lim *tel;
		__u32 mtu;
	case ICMPV6_DEST_UNREACH:
		net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
				     t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
		rel_msg = 1;
		break;
	case ICMPV6_TIME_EXCEED:
		if ((*code) == ICMPV6_EXC_HOPLIMIT) {
			net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
					     t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
			rel_msg = 1;
		}
		break;
	case ICMPV6_PARAMPROB:
		if ((*code) == ICMPV6_HDR_FIELD)
xeb@mail.ru's avatar
xeb@mail.ru committed
			teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
Linus Torvalds's avatar
Linus Torvalds committed

		if (teli && teli == *info - 2) {
Linus Torvalds's avatar
Linus Torvalds committed
			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
			if (tel->encap_limit == 0) {
				net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
						     t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
				rel_msg = 1;
			}
		} else {
			net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
					     t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
		}
		break;
	case ICMPV6_PKT_TOOBIG:
		mtu = *info - offset;
Linus Torvalds's avatar
Linus Torvalds committed
		if (mtu < IPV6_MIN_MTU)
			mtu = IPV6_MIN_MTU;
		t->dev->mtu = mtu;

		if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
Linus Torvalds's avatar
Linus Torvalds committed
			rel_type = ICMPV6_PKT_TOOBIG;
			rel_code = 0;
			rel_info = mtu;
			rel_msg = 1;
		}
		break;
	}

	*type = rel_type;
	*code = rel_code;
	*info = rel_info;
	*msg = rel_msg;

out:
	rcu_read_unlock();
static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
	   u8 type, u8 code, int offset, __be32 info)
	u8 rel_type = type;
	u8 rel_code = code;
	__u32 rel_info = ntohl(info);
	int err;
	struct sk_buff *skb2;
	const struct iphdr *eiph;
	err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
	if (err < 0)
		return err;

	if (rel_msg == 0)
		return 0;

	switch (rel_type) {
	case ICMPV6_DEST_UNREACH:
		if (rel_code != ICMPV6_ADDR_UNREACH)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_HOST_UNREACH;
		break;
	case ICMPV6_PKT_TOOBIG:
		if (rel_code != 0)
			return 0;
		rel_type = ICMP_DEST_UNREACH;
		rel_code = ICMP_FRAG_NEEDED;
		break;
	case NDISC_REDIRECT:
		rel_type = ICMP_REDIRECT;
		rel_code = ICMP_REDIR_HOST;
	default:
		return 0;
	}

	if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
		return 0;

	skb2 = skb_clone(skb, GFP_ATOMIC);
	if (!skb2)
		return 0;

Eric Dumazet's avatar
Eric Dumazet committed
	skb_dst_drop(skb2);

	skb_reset_network_header(skb2);

	/* Try to guess incoming interface */
	rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
				   eiph->saddr, 0,
				   0, 0,
				   IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
	skb2->dev = rt->dst.dev;

	/* route "incoming" packet */
	if (rt->rt_flags & RTCF_LOCAL) {
		ip_rt_put(rt);
		rt = NULL;
		rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
					   eiph->daddr, eiph->saddr,
					   0, 0,
					   IPPROTO_IPIP,
					   RT_TOS(eiph->tos), 0);
		    rt->dst.dev->type != ARPHRD_TUNNEL) {
			if (!IS_ERR(rt))
				ip_rt_put(rt);
		skb_dst_set(skb2, &rt->dst);
	} else {
		ip_rt_put(rt);
		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
				   skb2->dev) ||
Eric Dumazet's avatar
Eric Dumazet committed
		    skb_dst(skb2)->dev->type != ARPHRD_TUNNEL)
			goto out;
	}

	/* change mtu on this route */
	if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
Eric Dumazet's avatar
Eric Dumazet committed
		if (rel_info > dst_mtu(skb_dst(skb2)))
		skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
	if (rel_type == ICMP_REDIRECT)
		skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
	icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
	   u8 type, u8 code, int offset, __be32 info)
	u8 rel_type = type;
	u8 rel_code = code;
	__u32 rel_info = ntohl(info);
	err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
			  &rel_msg, &rel_info, offset);
	if (err < 0)
		return err;

	if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
Linus Torvalds's avatar
Linus Torvalds committed
		struct rt6_info *rt;
		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
Linus Torvalds's avatar
Linus Torvalds committed
		if (!skb2)
Linus Torvalds's avatar
Linus Torvalds committed

Eric Dumazet's avatar
Eric Dumazet committed
		skb_dst_drop(skb2);
Linus Torvalds's avatar
Linus Torvalds committed
		skb_pull(skb2, offset);
		skb_reset_network_header(skb2);
Linus Torvalds's avatar
Linus Torvalds committed

		/* Try to guess incoming interface */
		rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
				NULL, 0, 0);
Linus Torvalds's avatar
Linus Torvalds committed

		if (rt && rt->dst.dev)
			skb2->dev = rt->dst.dev;
Linus Torvalds's avatar
Linus Torvalds committed

		icmpv6_send(skb2, rel_type, rel_code, rel_info);
Linus Torvalds's avatar
Linus Torvalds committed

		ip6_rt_put(rt);
Linus Torvalds's avatar
Linus Torvalds committed

		kfree_skb(skb2);
	}
static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
				       const struct ipv6hdr *ipv6h,
				       struct sk_buff *skb)
{
	__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;

	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
		ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
	return IP6_ECN_decapsulate(ipv6h, skb);
static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
				       const struct ipv6hdr *ipv6h,
				       struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
{
	if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
		ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
Linus Torvalds's avatar
Linus Torvalds committed

	return IP6_ECN_decapsulate(ipv6h, skb);
Linus Torvalds's avatar
Linus Torvalds committed
}
xeb@mail.ru's avatar
xeb@mail.ru committed
__u32 ip6_tnl_get_cap(struct ip6_tnl *t,
			     const struct in6_addr *laddr,
			     const struct in6_addr *raddr)
{
xeb@mail.ru's avatar
xeb@mail.ru committed
	struct __ip6_tnl_parm *p = &t->parms;
	int ltype = ipv6_addr_type(laddr);
	int rtype = ipv6_addr_type(raddr);
	__u32 flags = 0;

	if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
		flags = IP6_TNL_F_CAP_PER_PACKET;
	} else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
		   rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
		   !((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
		   (!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
		if (ltype&IPV6_ADDR_UNICAST)
			flags |= IP6_TNL_F_CAP_XMIT;
		if (rtype&IPV6_ADDR_UNICAST)
			flags |= IP6_TNL_F_CAP_RCV;
	}
	return flags;
}
xeb@mail.ru's avatar
xeb@mail.ru committed
EXPORT_SYMBOL(ip6_tnl_get_cap);
/* called with rcu_read_lock() */
xeb@mail.ru's avatar
xeb@mail.ru committed
int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
				  const struct in6_addr *laddr,
				  const struct in6_addr *raddr)
xeb@mail.ru's avatar
xeb@mail.ru committed
	struct __ip6_tnl_parm *p = &t->parms;
	struct net *net = t->net;
	if ((p->flags & IP6_TNL_F_CAP_RCV) ||
	    ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
	     (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
		struct net_device *ldev = NULL;
			ldev = dev_get_by_index_rcu(net, p->link);
		if ((ipv6_addr_is_multicast(laddr) ||
		     likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
		    likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
xeb@mail.ru's avatar
xeb@mail.ru committed
EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
Linus Torvalds's avatar
Linus Torvalds committed

/**
 * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
Linus Torvalds's avatar
Linus Torvalds committed
 *   @skb: received socket buffer
 *   @protocol: ethernet protocol ID
 *   @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Return: 0
 **/

static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
		       int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
						   const struct ipv6hdr *ipv6h,
						   struct sk_buff *skb))
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl *t;
	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed

	rcu_read_lock();
Linus Torvalds's avatar
Linus Torvalds committed

	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
					&ipv6h->daddr)) != NULL) {
		struct pcpu_tstats *tstats;

		if (t->parms.proto != ipproto && t->parms.proto != 0) {
			rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
			rcu_read_unlock();
		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
			t->dev->stats.rx_dropped++;
			rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
			goto discard;
		}
		skb_reset_network_header(skb);
		skb->protocol = htons(protocol);
Linus Torvalds's avatar
Linus Torvalds committed
		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
		__skb_tunnel_rx(skb, t->dev, t->net);

		err = dscp_ecn_decapsulate(t, ipv6h, skb);
		if (unlikely(err)) {
			if (log_ecn_error)
				net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n",
						     &ipv6h->saddr,
						     ipv6_get_dsfield(ipv6h));
			if (err > 1) {
				++t->dev->stats.rx_frame_errors;
				++t->dev->stats.rx_errors;
				rcu_read_unlock();
				goto discard;
			}
		}

		tstats = this_cpu_ptr(t->dev->tstats);
		tstats->rx_packets++;
		tstats->rx_bytes += skb->len;

		netif_rx(skb);
		rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
		return 0;
	}
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
	return 1;

discard:
	kfree_skb(skb);
	return 0;
static int ip4ip6_rcv(struct sk_buff *skb)
{
	return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
			   ip4ip6_dscp_ecn_decapsulate);
static int ip6ip6_rcv(struct sk_buff *skb)
{
	return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
			   ip6ip6_dscp_ecn_decapsulate);
struct ipv6_tel_txoption {
	struct ipv6_txoptions ops;
	__u8 dst_opt[8];
};
Linus Torvalds's avatar
Linus Torvalds committed

static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
{
	memset(opt, 0, sizeof(struct ipv6_tel_txoption));
Linus Torvalds's avatar
Linus Torvalds committed

	opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
	opt->dst_opt[3] = 1;
	opt->dst_opt[4] = encap_limit;
	opt->dst_opt[5] = IPV6_TLV_PADN;
	opt->dst_opt[6] = 1;
Linus Torvalds's avatar
Linus Torvalds committed

	opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
	opt->ops.opt_nflen = 8;
 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
Linus Torvalds's avatar
Linus Torvalds committed
 *   @t: the outgoing tunnel device
 *   @hdr: IPv6 header from the incoming packet
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Description:
 *   Avoid trivial tunneling loop by checking that tunnel exit-point
Linus Torvalds's avatar
Linus Torvalds committed
 *   doesn't match source of incoming packet.
 *
Linus Torvalds's avatar
Linus Torvalds committed
 *   1 if conflict,
 *   0 else
 **/

static inline bool
ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
Linus Torvalds's avatar
Linus Torvalds committed
{
	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}

xeb@mail.ru's avatar
xeb@mail.ru committed
int ip6_tnl_xmit_ctl(struct ip6_tnl *t)
xeb@mail.ru's avatar
xeb@mail.ru committed
	struct __ip6_tnl_parm *p = &t->parms;
	struct net *net = t->net;
	if (p->flags & IP6_TNL_F_CAP_XMIT) {
		rcu_read_lock();
			ldev = dev_get_by_index_rcu(net, p->link);
		if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
			pr_warn("%s xmit: Local address not yet configured!\n",
				p->name);
		else if (!ipv6_addr_is_multicast(&p->raddr) &&
			 unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
				p->name);
		rcu_read_unlock();
xeb@mail.ru's avatar
xeb@mail.ru committed
EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl);

Linus Torvalds's avatar
Linus Torvalds committed
/**
 * ip6_tnl_xmit2 - encapsulate packet and send
Linus Torvalds's avatar
Linus Torvalds committed
 *   @skb: the outgoing socket buffer
 *   @dev: the outgoing tunnel device
 *   @dsfield: dscp code for outer header
 *   @fl: flow of tunneled packet
 *   @encap_limit: encapsulation limit
 *   @pmtu: Path MTU is stored if packet is too big
Linus Torvalds's avatar
Linus Torvalds committed
 *
 * Description:
 *   Build new header and do some sanity checks on the packet before sending
 *   it.
 *
 *   -1 fail
 *   %-EMSGSIZE message too big. return mtu in this case.
Linus Torvalds's avatar
Linus Torvalds committed
 **/

static int ip6_tnl_xmit2(struct sk_buff *skb,
			 struct net_device *dev,
			 __u8 dsfield,
			 struct flowi6 *fl6,
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ip6_tnl *t = netdev_priv(dev);
	struct net *net = t->net;
	struct net_device_stats *stats = &t->dev->stats;
	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
	struct dst_entry *dst = NULL, *ndst = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
	struct net_device *tdev;
	int mtu;
	unsigned int max_headroom = sizeof(struct ipv6hdr);
Linus Torvalds's avatar
Linus Torvalds committed
	u8 proto;
Linus Torvalds's avatar
Linus Torvalds committed

	if (!fl6->flowi6_mark)
		dst = ip6_tnl_dst_check(t);
	if (!dst) {
		ndst = ip6_route_output(net, NULL, fl6);
Linus Torvalds's avatar
Linus Torvalds committed

			goto tx_err_link_failure;
		ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
		if (IS_ERR(ndst)) {
			err = PTR_ERR(ndst);
			ndst = NULL;
			goto tx_err_link_failure;
		}
Linus Torvalds's avatar
Linus Torvalds committed

	tdev = dst->dev;

	if (tdev == dev) {
		stats->collisions++;
		net_warn_ratelimited("%s: Local routing loop detected!\n",
				     t->parms.name);
Linus Torvalds's avatar
Linus Torvalds committed
		goto tx_err_dst_release;
	}
	mtu = dst_mtu(dst) - sizeof (*ipv6h);
Linus Torvalds's avatar
Linus Torvalds committed
		max_headroom += 8;
		mtu -= 8;
	}
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;
Eric Dumazet's avatar
Eric Dumazet committed
	if (skb_dst(skb))
		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
Linus Torvalds's avatar
Linus Torvalds committed
	if (skb->len > mtu) {
Linus Torvalds's avatar
Linus Torvalds committed
		goto tx_err_dst_release;
	}

	skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));