Newer
Older
* IPv6 tunneling device
* Ville Nuorvala <vnuorval@tcs.hut.fi>
* Yasuyuki Kozakai <kozakai@linux-ipv6.org>
* linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
*
* RFC 2473
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/sockios.h>
#include <linux/icmp.h>
#include <linux/if.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/if_tunnel.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/route.h>
#include <linux/rtnetlink.h>
#include <linux/netfilter_ipv6.h>
#include <linux/slab.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/ip6_tunnel.h>
#include <net/xfrm.h>
#include <net/dsfield.h>
#include <net/inet_ecn.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
MODULE_DESCRIPTION("IPv6 tunneling device");
#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__)
#else
#define IP6_TNL_TRACE(x...) do {;} while(0)
#endif
#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
#define IPV6_TCLASS_SHIFT 20
#define HASH_SIZE_SHIFT 5
#define HASH_SIZE (1 << HASH_SIZE_SHIFT)
static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
{
u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
return hash_32(hash, HASH_SIZE_SHIFT);
}
static int ip6_tnl_dev_init(struct net_device *dev);
static void ip6_tnl_dev_setup(struct net_device *dev);
static struct rtnl_link_ops ip6_link_ops __read_mostly;
static int ip6_tnl_net_id __read_mostly;
struct ip6_tnl_net {
/* the IPv6 tunnel fallback device */
struct net_device *fb_tnl_dev;
/* lists for storing tunnels in use */
struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
struct ip6_tnl __rcu *tnls_wc[1];
struct ip6_tnl __rcu **tnls[2];
/* often modified stats are per cpu, other are shared (netdev->stats) */
struct pcpu_tstats {
unsigned long rx_packets;
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
} __attribute__((aligned(4*sizeof(unsigned long))));
static struct net_device_stats *ip6_get_stats(struct net_device *dev)
{
struct pcpu_tstats sum = { 0 };
int i;
for_each_possible_cpu(i) {
const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
sum.rx_packets += tstats->rx_packets;
sum.rx_bytes += tstats->rx_bytes;
sum.tx_packets += tstats->tx_packets;
sum.tx_bytes += tstats->tx_bytes;
}
dev->stats.rx_packets = sum.rx_packets;
dev->stats.rx_bytes = sum.rx_bytes;
dev->stats.tx_packets = sum.tx_packets;
dev->stats.tx_bytes = sum.tx_bytes;
return &dev->stats;
}
* Locking : hash tables are protected by RCU and RTNL
struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
dst->ops->check(dst, t->dst_cookie) == NULL) {
t->dst_cache = NULL;
dst_release(dst);
return NULL;
}
return dst;
}
{
dst_release(t->dst_cache);
t->dst_cache = NULL;
}
void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *) dst;
t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
dst_release(t->dst_cache);
t->dst_cache = dst;
}
* ip6_tnl_lookup - fetch tunnel matching the end-point addresses
* @remote: the address of the tunnel exit-point
* @local: the address of the tunnel entry-point
* else fallback tunnel if its device is up,
#define for_each_ip6_tunnel_rcu(start) \
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
unsigned int hash = HASH(remote, local);
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
if (ipv6_addr_equal(local, &t->parms.laddr) &&
ipv6_addr_equal(remote, &t->parms.raddr) &&
(t->dev->flags & IFF_UP))
return t;
}
t = rcu_dereference(ip6n->tnls_wc[0]);
if (t && (t->dev->flags & IFF_UP))
* ip6_tnl_bucket - get head of list matching given tunnel parameters
* @p: parameters containing tunnel end-points
* ip6_tnl_bucket() returns the head of the list matching the
* &struct in6_addr entries laddr and raddr in @p.
*
* Return: head of IPv6 tunnel list
ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
const struct in6_addr *remote = &p->raddr;
const struct in6_addr *local = &p->laddr;
int prio = 0;
if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
prio = 1;
return &ip6n->tnls[prio][h];
* ip6_tnl_link - add tunnel to hash table
ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
rcu_assign_pointer(t->next , rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
* ip6_tnl_unlink - remove tunnel from hash table
* @t: tunnel to be removed
**/
static void
ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter;
for (tp = ip6_tnl_bucket(ip6n, &t->parms);
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
if (t == iter) {
rcu_assign_pointer(*tp, t->next);
static void ip6_dev_free(struct net_device *dev)
{
free_percpu(dev->tstats);
free_netdev(dev);
}
* ip6_tnl_create - create a new tunnel
* @p: tunnel parameters
* @pt: pointer to new tunnel
*
* Description:
* Create tunnel matching given parameters.
static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
{
struct net_device *dev;
struct ip6_tnl *t;
char name[IFNAMSIZ];
int err;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);

Pavel Emelyanov
committed
if (p->name[0])

Pavel Emelyanov
committed
else
sprintf(name, "ip6tnl%%d");
dev = alloc_netdev(sizeof (*t), name, ip6_tnl_dev_setup);
dev_net_set(dev, net);
t = netdev_priv(dev);
err = ip6_tnl_dev_init(dev);
if (err < 0)
goto failed_free;
if ((err = register_netdevice(dev)) < 0)
goto failed_free;
strcpy(t->parms.name, dev->name);
dev->rtnl_link_ops = &ip6_link_ops;
ip6_tnl_link(ip6n, t);
* ip6_tnl_locate - find or create tunnel matching given parameters
* @create: != 0 if allowed to create new tunnel if no match found
*
* Description:
* ip6_tnl_locate() first tries to locate an existing tunnel
* based on @parms. If this is unsuccessful, but @create is set a new
* tunnel device is created and registered for use.
*
* Return:
* matching tunnel or NULL
static struct ip6_tnl *ip6_tnl_locate(struct net *net,
const struct in6_addr *remote = &p->raddr;
const struct in6_addr *local = &p->laddr;
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
for (tp = ip6_tnl_bucket(ip6n, p);
(t = rtnl_dereference(*tp)) != NULL;
tp = &t->next) {
ipv6_addr_equal(remote, &t->parms.raddr))
return t;
return ip6_tnl_create(net, p);
* ip6_tnl_dev_uninit - tunnel device uninitializer
* ip6_tnl_dev_uninit() removes tunnel from its list
ip6_tnl_dev_uninit(struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
struct net *net = dev_net(dev);
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
ip6_tnl_unlink(ip6n, t);
ip6_tnl_dst_reset(t);
dev_put(dev);
}
/**
* parse_tvl_tnl_enc_lim - handle encapsulation limit option
* @skb: received socket buffer
*
* Return:
* 0 if none was found,
__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
__u8 nexthdr = ipv6h->nexthdr;
__u16 off = sizeof (*ipv6h);
while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
__u16 optlen = 0;
struct ipv6_opt_hdr *hdr;
if (raw + off + sizeof (*hdr) > skb->data &&
!pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
break;
hdr = (struct ipv6_opt_hdr *) (raw + off);
if (nexthdr == NEXTHDR_FRAGMENT) {
struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
if (frag_hdr->frag_off)
break;
optlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
optlen = (hdr->hdrlen + 2) << 2;
} else {
optlen = ipv6_optlen(hdr);
}
if (nexthdr == NEXTHDR_DEST) {
__u16 i = off + 2;
while (1) {
struct ipv6_tlv_tnl_enc_lim *tel;
/* No more room for encapsulation limit */
if (i + sizeof (*tel) > off + optlen)
break;
tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
/* return index of option if found and valid */
if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
tel->length == 1)
return i;
/* else jump to next option */
if (tel->type)
i += tel->length + 2;
else
i++;
}
}
nexthdr = hdr->nexthdr;
off += optlen;
}
return 0;
}
* ip6_tnl_err - tunnel error handler
* ip6_tnl_err() should handle errors in the tunnel according
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
u8 *type, u8 *code, int *msg, __u32 *info, int offset)
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data;
u8 rel_type = ICMPV6_DEST_UNREACH;
u8 rel_code = ICMPV6_ADDR_UNREACH;
/* If the packet doesn't contain the original IPv6 header we are
in trouble since we might need the source address for further
if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr,
&ipv6h->saddr)) == NULL)
if (t->parms.proto != ipproto && t->parms.proto != 0)
goto out;
switch (*type) {
__u32 teli;
struct ipv6_tlv_tnl_enc_lim *tel;
__u32 mtu;
case ICMPV6_DEST_UNREACH:
net_warn_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
if ((*code) == ICMPV6_EXC_HOPLIMIT) {
net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
t->parms.name);
rel_msg = 1;
}
break;
case ICMPV6_PARAMPROB:
if ((*code) == ICMPV6_HDR_FIELD)
teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data);
tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
if (tel->encap_limit == 0) {
net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n",
t->parms.name);
} else {
net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
t->parms.name);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
if ((len = sizeof (*ipv6h) + ntohs(ipv6h->payload_len)) > mtu) {
rel_type = ICMPV6_PKT_TOOBIG;
rel_code = 0;
rel_info = mtu;
rel_msg = 1;
}
break;
}
*type = rel_type;
*code = rel_code;
*info = rel_info;
*msg = rel_msg;
out:
return err;
}
static int
ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
int rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
int err;
struct sk_buff *skb2;
struct rtable *rt;
struct flowi4 fl4;
err = ip6_tnl_err(skb, IPPROTO_IPIP, opt, &rel_type, &rel_code,
&rel_msg, &rel_info, offset);
if (err < 0)
return err;
if (rel_msg == 0)
return 0;
switch (rel_type) {
case ICMPV6_DEST_UNREACH:
if (rel_code != ICMPV6_ADDR_UNREACH)
return 0;
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_HOST_UNREACH;
break;
case ICMPV6_PKT_TOOBIG:
if (rel_code != 0)
return 0;
rel_type = ICMP_DEST_UNREACH;
rel_code = ICMP_FRAG_NEEDED;
break;
case NDISC_REDIRECT:
rel_type = ICMP_REDIRECT;
rel_code = ICMP_REDIR_HOST;
default:
return 0;
}
if (!pskb_may_pull(skb, offset + sizeof(struct iphdr)))
return 0;
skb2 = skb_clone(skb, GFP_ATOMIC);
if (!skb2)
return 0;
skb_pull(skb2, offset);
skb_reset_network_header(skb2);
eiph = ip_hdr(skb2);
/* Try to guess incoming interface */
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->saddr, 0,
0, 0,
IPPROTO_IPIP, RT_TOS(eiph->tos), 0);
if (IS_ERR(rt))
goto out;
/* route "incoming" packet */
if (rt->rt_flags & RTCF_LOCAL) {
ip_rt_put(rt);
rt = NULL;
rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL,
eiph->daddr, eiph->saddr,
0, 0,
IPPROTO_IPIP,
RT_TOS(eiph->tos), 0);
if (IS_ERR(rt) ||
rt->dst.dev->type != ARPHRD_TUNNEL) {
if (!IS_ERR(rt))
ip_rt_put(rt);
goto out;
}
skb_dst_set(skb2, &rt->dst);
} else {
ip_rt_put(rt);
if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos,
skb2->dev) ||
goto out;
}
/* change mtu on this route */
if (rel_type == ICMP_DEST_UNREACH && rel_code == ICMP_FRAG_NEEDED) {
goto out;

David S. Miller
committed
skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info);
if (rel_type == ICMP_REDIRECT)

David S. Miller
committed
skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2);
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
out:
kfree_skb(skb2);
return 0;
}
static int
ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
int rel_msg = 0;
u8 rel_type = type;
u8 rel_code = code;
int err;
err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code,
&rel_msg, &rel_info, offset);
if (err < 0)
return err;
if (rel_msg && pskb_may_pull(skb, offset + sizeof(struct ipv6hdr))) {
struct rt6_info *rt;
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
return 0;
skb_reset_network_header(skb2);
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
NULL, 0, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
icmpv6_send(skb2, rel_type, rel_code, rel_info);
return 0;
static void ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
const struct ipv6hdr *ipv6h,
struct sk_buff *skb)
{
__u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK;
if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield);
if (INET_ECN_is_ce(dsfield))
IP_ECN_set_ce(ip_hdr(skb));
static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
const struct ipv6hdr *ipv6h,
struct sk_buff *skb)
if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb));
if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h)))
IP6_ECN_set_ce(ipv6_hdr(skb));

Ville Nuorvala
committed
const struct in6_addr *laddr,
const struct in6_addr *raddr)
{

Ville Nuorvala
committed
int ltype = ipv6_addr_type(laddr);
int rtype = ipv6_addr_type(raddr);
__u32 flags = 0;
if (ltype == IPV6_ADDR_ANY || rtype == IPV6_ADDR_ANY) {
flags = IP6_TNL_F_CAP_PER_PACKET;
} else if (ltype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
rtype & (IPV6_ADDR_UNICAST|IPV6_ADDR_MULTICAST) &&
!((ltype|rtype) & IPV6_ADDR_LOOPBACK) &&
(!((ltype|rtype) & IPV6_ADDR_LINKLOCAL) || p->link)) {
if (ltype&IPV6_ADDR_UNICAST)
flags |= IP6_TNL_F_CAP_XMIT;
if (rtype&IPV6_ADDR_UNICAST)
flags |= IP6_TNL_F_CAP_RCV;
}
return flags;
}

Ville Nuorvala
committed

Ville Nuorvala
committed
const struct in6_addr *laddr,
const struct in6_addr *raddr)
int ret = 0;
struct net *net = dev_net(t->dev);

Ville Nuorvala
committed
if ((p->flags & IP6_TNL_F_CAP_RCV) ||
((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
(ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_RCV))) {
if (p->link)

Ville Nuorvala
committed
if ((ipv6_addr_is_multicast(laddr) ||
likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
likely(!ipv6_chk_addr(net, raddr, NULL, 0)))
ret = 1;
}
return ret;
}
* ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally
* @protocol: ethernet protocol ID
* @dscp_ecn_decapsulate: the function to decapsulate DSCP code and ECN
static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
__u8 ipproto,
void (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
const struct ipv6hdr *ipv6h,
struct sk_buff *skb))
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
&ipv6h->daddr)) != NULL) {
if (t->parms.proto != ipproto && t->parms.proto != 0) {
goto discard;
}

Ville Nuorvala
committed
if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) {
t->dev->stats.rx_dropped++;

Arnaldo Carvalho de Melo
committed
skb->mac_header = skb->network_header;
skb_reset_network_header(skb);
skb->protocol = htons(protocol);
skb->pkt_type = PACKET_HOST;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
tstats = this_cpu_ptr(t->dev->tstats);
tstats->rx_packets++;
tstats->rx_bytes += skb->len;
__skb_tunnel_rx(skb, t->dev);
dscp_ecn_decapsulate(t, ipv6h, skb);
discard:
kfree_skb(skb);
return 0;
static int ip4ip6_rcv(struct sk_buff *skb)
{
return ip6_tnl_rcv(skb, ETH_P_IP, IPPROTO_IPIP,
ip4ip6_dscp_ecn_decapsulate);
static int ip6ip6_rcv(struct sk_buff *skb)
{
return ip6_tnl_rcv(skb, ETH_P_IPV6, IPPROTO_IPV6,
ip6ip6_dscp_ecn_decapsulate);
struct ipv6_tel_txoption {
struct ipv6_txoptions ops;
__u8 dst_opt[8];
};
static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit)
{
memset(opt, 0, sizeof(struct ipv6_tel_txoption));
opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT;
opt->dst_opt[3] = 1;
opt->dst_opt[4] = encap_limit;
opt->dst_opt[5] = IPV6_TLV_PADN;
opt->dst_opt[6] = 1;
opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt;
opt->ops.opt_nflen = 8;
* ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
* @hdr: IPv6 header from the incoming packet
* Avoid trivial tunneling loop by checking that tunnel exit-point
ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr)
{
return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
}
int ret = 0;
struct net *net = dev_net(t->dev);
if (p->flags & IP6_TNL_F_CAP_XMIT) {
struct net_device *ldev = NULL;
if (p->link)
if (unlikely(!ipv6_chk_addr(net, &p->laddr, ldev, 0)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
else if (!ipv6_addr_is_multicast(&p->raddr) &&
unlikely(ipv6_chk_addr(net, &p->raddr, NULL, 0)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
else
ret = 1;
}
return ret;
}
* ip6_tnl_xmit2 - encapsulate packet and send
* @dev: the outgoing tunnel device
* @dsfield: dscp code for outer header
* @fl: flow of tunneled packet
* @encap_limit: encapsulation limit
* @pmtu: Path MTU is stored if packet is too big
*
* Description:
* Build new header and do some sanity checks on the packet before sending
* it.
*
* 0 on success
* -1 fail
* %-EMSGSIZE message too big. return mtu in this case.
static int ip6_tnl_xmit2(struct sk_buff *skb,
struct net_device *dev,
__u8 dsfield,
int encap_limit,
__u32 *pmtu)
struct ip6_tnl *t = netdev_priv(dev);
struct net_device_stats *stats = &t->dev->stats;
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ipv6_tel_txoption opt;
struct dst_entry *dst = NULL, *ndst = NULL;
unsigned int max_headroom = sizeof(struct ipv6hdr);
int err = -1;
if (!fl6->flowi6_mark)
dst = ip6_tnl_dst_check(t);
if (!dst) {
ndst = ip6_route_output(net, NULL, fl6);
goto tx_err_link_failure;
ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0);
if (IS_ERR(ndst)) {
err = PTR_ERR(ndst);
ndst = NULL;
goto tx_err_link_failure;
}
tdev = dst->dev;
if (tdev == dev) {
stats->collisions++;
net_warn_ratelimited("%s: Local routing loop detected!\n",
t->parms.name);
goto tx_err_dst_release;
}
mtu = dst_mtu(dst) - sizeof (*ipv6h);
if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
}
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;

David S. Miller
committed
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
*pmtu = mtu;
err = -EMSGSIZE;
goto tx_err_dst_release;
}
/*
* Okay, now see if we can stuff it in the buffer as-is.
*/
max_headroom += LL_RESERVED_SPACE(tdev);
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
goto tx_err_dst_release;
if (skb->sk)
skb_set_owner_w(new_skb, skb->sk);
if (fl6->flowi6_mark) {
skb_dst_set(skb, dst);
ndst = NULL;
} else {
skb_dst_set_noref(skb, dst);
}

Arnaldo Carvalho de Melo
committed
skb->transport_header = skb->network_header;
proto = fl6->flowi6_proto;