Skip to content
Snippets Groups Projects
udp.c 37.2 KiB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
/*
 *	UDP over IPv6
 *	Linux INET6 implementation
Linus Torvalds's avatar
Linus Torvalds committed
 *
 *	Authors:
 *	Pedro Roque		<roque@di.fc.ul.pt>
Linus Torvalds's avatar
Linus Torvalds committed
 *
 *	Based on linux/ipv4/udp.c
 *
 *	Fixes:
 *	Hideaki YOSHIFUJI	:	sin6_scope_id support
 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
 *					a single port at the same time.
 *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
 *      YOSHIFUJI Hideaki @USAGI:	convert /proc/net/udp6 to seq_file.
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/ipv6.h>
#include <linux/icmpv6.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/skbuff.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include <asm/uaccess.h>

#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/transp_v6.h>
#include <net/ip6_route.h>
#include <net/raw.h>
#include <net/tcp_states.h>
Linus Torvalds's avatar
Linus Torvalds committed
#include <net/ip6_checksum.h>
#include <net/xfrm.h>

#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include "udp_impl.h"
Linus Torvalds's avatar
Linus Torvalds committed

int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
{
	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
	__be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
	__be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
	int sk_ipv6only = ipv6_only_sock(sk);
	int sk2_ipv6only = inet_v6_ipv6only(sk2);
	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;

	/* if both are mapped, treat as IPv4 */
	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
		return (!sk2_ipv6only &&
			(!sk1_rcv_saddr || !sk2_rcv_saddr ||
			  sk1_rcv_saddr == sk2_rcv_saddr));

	if (addr_type2 == IPV6_ADDR_ANY &&
	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
		return 1;

	if (addr_type == IPV6_ADDR_ANY &&
	    !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
		return 1;

	if (sk2_rcv_saddr6 &&
	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
		return 1;

	return 0;
}

static unsigned int udp6_portaddr_hash(struct net *net,
				       const struct in6_addr *addr6,
				       unsigned int port)
{
	unsigned int hash, mix = net_hash_mix(net);

	if (ipv6_addr_any(addr6))
		hash = jhash_1word(0, mix);
	else if (ipv6_addr_v4mapped(addr6))
		hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
		hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
int udp_v6_get_port(struct sock *sk, unsigned short snum)
Linus Torvalds's avatar
Linus Torvalds committed
{
Eric Dumazet's avatar
Eric Dumazet committed
	unsigned int hash2_nulladdr =
		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
	unsigned int hash2_partial = 
		udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);

	/* precompute partial secondary hash */
Eric Dumazet's avatar
Eric Dumazet committed
	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
static void udp_v6_rehash(struct sock *sk)
{
	u16 new_hash = udp6_portaddr_hash(sock_net(sk),
					  &inet6_sk(sk)->rcv_saddr,
					  inet_sk(sk)->inet_num);

	udp_lib_rehash(sk, new_hash);
}

static inline int compute_score(struct sock *sk, struct net *net,
				unsigned short hnum,
				const struct in6_addr *saddr, __be16 sport,
				const struct in6_addr *daddr, __be16 dport,
	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
			sk->sk_family == PF_INET6) {
		struct ipv6_pinfo *np = inet6_sk(sk);
		struct inet_sock *inet = inet_sk(sk);

		score = 0;
		if (inet->inet_dport) {
			if (inet->inet_dport != sport)
				return -1;
			score++;
		}
		if (!ipv6_addr_any(&np->rcv_saddr)) {
			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
				return -1;
			score++;
		}
		if (!ipv6_addr_any(&np->daddr)) {
			if (!ipv6_addr_equal(&np->daddr, saddr))
				return -1;
			score++;
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
			score++;
		}
	}
	return score;
}

#define SCORE2_MAX (1 + 1 + 1)
static inline int compute_score2(struct sock *sk, struct net *net,
				const struct in6_addr *saddr, __be16 sport,
				const struct in6_addr *daddr, unsigned short hnum,
				int dif)
{
	int score = -1;

	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
			sk->sk_family == PF_INET6) {
		struct ipv6_pinfo *np = inet6_sk(sk);
		struct inet_sock *inet = inet_sk(sk);

		if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
			return -1;
		score = 0;
		if (inet->inet_dport) {
			if (inet->inet_dport != sport)
				return -1;
			score++;
		}
		if (!ipv6_addr_any(&np->daddr)) {
			if (!ipv6_addr_equal(&np->daddr, saddr))
				return -1;
			score++;
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
			score++;
		}
	}
	return score;
}


/* called with read_rcu_lock() */
static struct sock *udp6_lib_lookup2(struct net *net,
		const struct in6_addr *saddr, __be16 sport,
		const struct in6_addr *daddr, unsigned int hnum, int dif,
		struct udp_hslot *hslot2, unsigned int slot2)
{
	struct sock *sk, *result;
	struct hlist_nulls_node *node;
	int score, badness;

begin:
	result = NULL;
	badness = -1;
	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
		score = compute_score2(sk, net, saddr, sport,
				      daddr, hnum, dif);
		if (score > badness) {
			result = sk;
			badness = score;
			if (score == SCORE2_MAX)
				goto exact_match;
		}
	}
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != slot2)
		goto begin;

	if (result) {
exact_match:
		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
			result = NULL;
		else if (unlikely(compute_score2(result, net, saddr, sport,
				  daddr, hnum, dif) < badness)) {
			sock_put(result);
			goto begin;
		}
	}
	return result;
}

static struct sock *__udp6_lib_lookup(struct net *net,
				      const struct in6_addr *saddr, __be16 sport,
				      const struct in6_addr *daddr, __be16 dport,
				      int dif, struct udp_table *udptable)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct sock *sk, *result;
	struct hlist_nulls_node *node;
Linus Torvalds's avatar
Linus Torvalds committed
	unsigned short hnum = ntohs(dport);
	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
	int score, badness;
	rcu_read_lock();
	if (hslot->count > 10) {
		hash2 = udp6_portaddr_hash(net, daddr, hnum);
		slot2 = hash2 & udptable->mask;
		hslot2 = &udptable->hash2[slot2];
		if (hslot->count < hslot2->count)
			goto begin;

		result = udp6_lib_lookup2(net, saddr, sport,
					  daddr, hnum, dif,
					  hslot2, slot2);
		if (!result) {
			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
			slot2 = hash2 & udptable->mask;
			hslot2 = &udptable->hash2[slot2];
			if (hslot->count < hslot2->count)
				goto begin;

			result = udp6_lib_lookup2(net, saddr, sport,
						  &in6addr_any, hnum, dif,
						  hslot2, slot2);
		}
		rcu_read_unlock();
		return result;
	}
begin:
	result = NULL;
	badness = -1;
	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
		if (score > badness) {
			result = sk;
			badness = score;
Linus Torvalds's avatar
Linus Torvalds committed
		}
	}
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != slot)
	if (result) {
		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
			result = NULL;
		else if (unlikely(compute_score(result, net, hnum, saddr, sport,
					daddr, dport, dif) < badness)) {
			sock_put(result);
			goto begin;
		}
	}
	rcu_read_unlock();
Linus Torvalds's avatar
Linus Torvalds committed
	return result;
}

static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
					  __be16 sport, __be16 dport,
					  struct udp_table *udptable)
	struct ipv6hdr *iph = ipv6_hdr(skb);

	if (unlikely(sk = skb_steal_sock(skb)))
		return sk;
Eric Dumazet's avatar
Eric Dumazet committed
	return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
				 &iph->daddr, dport, inet6_iif(skb),
				 udptable);
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
			     const struct in6_addr *daddr, __be16 dport, int dif)
{
	return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
}
EXPORT_SYMBOL_GPL(udp6_lib_lookup);


Linus Torvalds's avatar
Linus Torvalds committed
/*
 * 	This should be easy, if there is something there we
 * 	return it, otherwise we block.
 */

int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
Linus Torvalds's avatar
Linus Torvalds committed
		  struct msghdr *msg, size_t len,
		  int noblock, int flags, int *addr_len)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct inet_sock *inet = inet_sk(sk);
	struct sk_buff *skb;
	unsigned int ulen;
	int err;
	int is_udplite = IS_UDPLITE(sk);
	bool slow;
Linus Torvalds's avatar
Linus Torvalds committed

	if (addr_len)
		*addr_len=sizeof(struct sockaddr_in6);

Linus Torvalds's avatar
Linus Torvalds committed
	if (flags & MSG_ERRQUEUE)
		return ipv6_recv_error(sk, msg, len);

	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
		return ipv6_recv_rxpmtu(sk, msg, len);

Linus Torvalds's avatar
Linus Torvalds committed
try_again:
	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
				  &peeked, &err);
Linus Torvalds's avatar
Linus Torvalds committed
	if (!skb)
		goto out;

	ulen = skb->len - sizeof(struct udphdr);
	if (len > ulen)
		len = ulen;
	else if (len < ulen)
		msg->msg_flags |= MSG_TRUNC;
Linus Torvalds's avatar
Linus Torvalds committed

	is_udp4 = (skb->protocol == htons(ETH_P_IP));

	 * If checksum is needed at all, try to do it while copying the
	 * data.  If the data is truncated, or if we only want a partial
	 * coverage checksum (UDP-Lite), do it before the copy.
	if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
		if (udp_lib_checksum_complete(skb))
Linus Torvalds's avatar
Linus Torvalds committed
			goto csum_copy_err;
	if (skb_csum_unnecessary(skb))
		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
					      msg->msg_iov,len);
Linus Torvalds's avatar
Linus Torvalds committed
		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
		if (err == -EINVAL)
			goto csum_copy_err;
	}
	if (err)
		goto out_free;

	if (!peeked) {
		if (is_udp4)
			UDP_INC_STATS_USER(sock_net(sk),
					UDP_MIB_INDATAGRAMS, is_udplite);
		else
			UDP6_INC_STATS_USER(sock_net(sk),
					UDP_MIB_INDATAGRAMS, is_udplite);
	}
	sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed

	/* Copy the address. */
	if (msg->msg_name) {
		struct sockaddr_in6 *sin6;
Linus Torvalds's avatar
Linus Torvalds committed
		sin6 = (struct sockaddr_in6 *) msg->msg_name;
		sin6->sin6_family = AF_INET6;
		sin6->sin6_port = udp_hdr(skb)->source;
Linus Torvalds's avatar
Linus Torvalds committed
		sin6->sin6_flowinfo = 0;
		sin6->sin6_scope_id = 0;

			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
					       &sin6->sin6_addr);
Linus Torvalds's avatar
Linus Torvalds committed
		else {
			ipv6_addr_copy(&sin6->sin6_addr,
				       &ipv6_hdr(skb)->saddr);
Linus Torvalds's avatar
Linus Torvalds committed
			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
				sin6->sin6_scope_id = IP6CB(skb)->iif;
		}

	}
Linus Torvalds's avatar
Linus Torvalds committed
		if (inet->cmsg_flags)
			ip_cmsg_recv(msg, skb);
	} else {
		if (np->rxopt.all)
			datagram_recv_ctl(sk, msg, skb);
Linus Torvalds's avatar
Linus Torvalds committed

	err = len;
Linus Torvalds's avatar
Linus Torvalds committed
	if (flags & MSG_TRUNC)
Linus Torvalds's avatar
Linus Torvalds committed

out_free:
	skb_free_datagram_locked(sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
out:
	return err;

csum_copy_err:
	slow = lock_sock_fast(sk);
	if (!skb_kill_datagram(sk, skb, flags)) {
		if (is_udp4)
			UDP_INC_STATS_USER(sock_net(sk),
					UDP_MIB_INERRORS, is_udplite);
		else
			UDP6_INC_STATS_USER(sock_net(sk),
					UDP_MIB_INERRORS, is_udplite);
	}
	unlock_sock_fast(sk, slow);
Linus Torvalds's avatar
Linus Torvalds committed

	if (flags & MSG_DONTWAIT)
Linus Torvalds's avatar
Linus Torvalds committed
		return -EAGAIN;
	goto try_again;
}

void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
		    u8 type, u8 code, int offset, __be32 info,
		    struct udp_table *udptable)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct ipv6_pinfo *np;
	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
	struct in6_addr *saddr = &hdr->saddr;
	struct in6_addr *daddr = &hdr->daddr;
	struct udphdr *uh = (struct udphdr*)(skb->data+offset);
	struct sock *sk;
	int err;

	sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
			       saddr, uh->source, inet6_iif(skb), udptable);
Linus Torvalds's avatar
Linus Torvalds committed
	if (sk == NULL)
		return;

	np = inet6_sk(sk);

	if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
		goto out;

	if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
		goto out;

Linus Torvalds's avatar
Linus Torvalds committed
		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
Linus Torvalds's avatar
Linus Torvalds committed
	sk->sk_err = err;
	sk->sk_error_report(sk);
out:
	sock_put(sk);
}

static __inline__ void udpv6_err(struct sk_buff *skb,
				 struct inet6_skb_parm *opt, u8 type,
				 u8 code, int offset, __be32 info     )
	__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
}

int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct udp_sock *up = udp_sk(sk);
	int is_udplite = IS_UDPLITE(sk);
	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
		goto drop;
Linus Torvalds's avatar
Linus Torvalds committed

	/*
	 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
	 */
	if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {

		if (up->pcrlen == 0) {          /* full coverage was set  */
			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
				" %d while full coverage %d requested\n",
				UDP_SKB_CB(skb)->cscov, skb->len);
			goto drop;
		}
		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
						    "too small, need min %d\n",
				       UDP_SKB_CB(skb)->cscov, up->pcrlen);
			goto drop;
		}
	if (rcu_dereference_raw(sk->sk_filter)) {
		if (udp_lib_checksum_complete(skb))
			goto drop;
	}
	if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
		/* Note that an ENOMEM error is charged twice */
		if (rc == -ENOMEM)
			UDP6_INC_STATS_BH(sock_net(sk),
					UDP_MIB_RCVBUFERRORS, is_udplite);
		goto drop_no_sk_drops_inc;
Linus Torvalds's avatar
Linus Torvalds committed
	}
Linus Torvalds's avatar
Linus Torvalds committed
	return 0;
	atomic_inc(&sk->sk_drops);
drop_no_sk_drops_inc:
	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
	kfree_skb(skb);
	return -1;
static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
				      __be16 loc_port, struct in6_addr *loc_addr,
				      __be16 rmt_port, struct in6_addr *rmt_addr,
Linus Torvalds's avatar
Linus Torvalds committed
				      int dif)
{
	struct hlist_nulls_node *node;
Linus Torvalds's avatar
Linus Torvalds committed
	struct sock *s = sk;
	unsigned short num = ntohs(loc_port);

	sk_nulls_for_each_from(s, node) {
Linus Torvalds's avatar
Linus Torvalds committed
		struct inet_sock *inet = inet_sk(s);

		if (!net_eq(sock_net(s), net))
		if (udp_sk(s)->udp_port_hash == num &&
		    s->sk_family == PF_INET6) {
Linus Torvalds's avatar
Linus Torvalds committed
			struct ipv6_pinfo *np = inet6_sk(s);
			if (inet->inet_dport) {
				if (inet->inet_dport != rmt_port)
Linus Torvalds's avatar
Linus Torvalds committed
					continue;
			}
			if (!ipv6_addr_any(&np->daddr) &&
			    !ipv6_addr_equal(&np->daddr, rmt_addr))
				continue;

			if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
				continue;

			if (!ipv6_addr_any(&np->rcv_saddr)) {
				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
					continue;
Linus Torvalds's avatar
Linus Torvalds committed
			}
			if (!inet6_mc_check(s, loc_addr, rmt_addr))
Linus Torvalds's avatar
Linus Torvalds committed
				continue;
			return s;
		}
	}
	return NULL;
}

static void flush_stack(struct sock **stack, unsigned int count,
			struct sk_buff *skb, unsigned int final)
{
	unsigned int i;
	struct sock *sk;
	struct sk_buff *skb1;

	for (i = 0; i < count; i++) {
		skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);

			if (sk_rcvqueues_full(sk, skb)) {
				kfree_skb(skb1);
				goto drop;
			}
			bh_lock_sock(sk);
			if (!sock_owned_by_user(sk))
				udpv6_queue_rcv_skb(sk, skb1);
Zhu Yi's avatar
Zhu Yi committed
			else if (sk_add_backlog(sk, skb1)) {
Zhu Yi's avatar
Zhu Yi committed
				kfree_skb(skb1);
				bh_unlock_sock(sk);
				goto drop;
			}
			bh_unlock_sock(sk);
Zhu Yi's avatar
Zhu Yi committed
			continue;
Zhu Yi's avatar
Zhu Yi committed
drop:
		atomic_inc(&sk->sk_drops);
		UDP6_INC_STATS_BH(sock_net(sk),
				UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
		UDP6_INC_STATS_BH(sock_net(sk),
				UDP_MIB_INERRORS, IS_UDPLITE(sk));
Linus Torvalds's avatar
Linus Torvalds committed
/*
 * Note: called only from the BH handler context,
 * so we don't need to lock the hashes.
 */
static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
		struct in6_addr *saddr, struct in6_addr *daddr,
		struct udp_table *udptable)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct sock *sk, *stack[256 / sizeof(struct sock *)];
	const struct udphdr *uh = udp_hdr(skb);
	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
Linus Torvalds's avatar
Linus Torvalds committed
	int dif;
	unsigned int i, count = 0;
Linus Torvalds's avatar
Linus Torvalds committed

	spin_lock(&hslot->lock);
	sk = sk_nulls_head(&hslot->head);
	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
	while (sk) {
		stack[count++] = sk;
		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
				       uh->source, saddr, dif);
		if (unlikely(count == ARRAY_SIZE(stack))) {
			if (!sk)
				break;
			flush_stack(stack, count, skb, ~0);
			count = 0;
Linus Torvalds's avatar
Linus Torvalds committed
	}
	/*
	 * before releasing the lock, we must take reference on sockets
	 */
	for (i = 0; i < count; i++)
		sock_hold(stack[i]);

	spin_unlock(&hslot->lock);

	if (count) {
		flush_stack(stack, count, skb, count - 1);

		for (i = 0; i < count; i++)
			sock_put(stack[i]);
	} else {
		kfree_skb(skb);
	}
static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
				 int proto)
	int err;

	UDP_SKB_CB(skb)->partial_cov = 0;
	UDP_SKB_CB(skb)->cscov = skb->len;

	if (proto == IPPROTO_UDPLITE) {
		err = udplite_checksum_init(skb, uh);
		if (err)
			return err;
	}

	if (uh->check == 0) {
		/* RFC 2460 section 8.1 says that we SHOULD log
		   this error. Well, it is reasonable.
		 */
		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
		return 1;
	}
	if (skb->ip_summed == CHECKSUM_COMPLETE &&
	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
			     skb->len, proto, skb->csum))
		skb->ip_summed = CHECKSUM_UNNECESSARY;

	if (!skb_csum_unnecessary(skb))
		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
							 &ipv6_hdr(skb)->daddr,
							 skb->len, proto, 0));
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct net *net = dev_net(skb->dev);
Linus Torvalds's avatar
Linus Torvalds committed
	struct sock *sk;
	struct udphdr *uh;
Linus Torvalds's avatar
Linus Torvalds committed
	struct in6_addr *saddr, *daddr;
	u32 ulen = 0;

	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
Linus Torvalds's avatar
Linus Torvalds committed

	saddr = &ipv6_hdr(skb)->saddr;
	daddr = &ipv6_hdr(skb)->daddr;
Linus Torvalds's avatar
Linus Torvalds committed

	ulen = ntohs(uh->len);
	if (ulen > skb->len)
		goto short_packet;
Linus Torvalds's avatar
Linus Torvalds committed

	if (proto == IPPROTO_UDP) {
		/* UDP validates ulen. */
Linus Torvalds's avatar
Linus Torvalds committed

		/* Check for jumbo payload */
		if (ulen == 0)
			ulen = skb->len;
Linus Torvalds's avatar
Linus Torvalds committed

		if (ulen < sizeof(*uh))
			goto short_packet;
Linus Torvalds's avatar
Linus Torvalds committed

		if (ulen < skb->len) {
			if (pskb_trim_rcsum(skb, ulen))
				goto short_packet;
			saddr = &ipv6_hdr(skb)->saddr;
			daddr = &ipv6_hdr(skb)->daddr;
Linus Torvalds's avatar
Linus Torvalds committed

	if (udp6_csum_init(skb, uh, proto))
		goto discard;

	/*
	 *	Multicast receive code
Linus Torvalds's avatar
Linus Torvalds committed
	 */
	if (ipv6_addr_is_multicast(daddr))
		return __udp6_lib_mcast_deliver(net, skb,
				saddr, daddr, udptable);
Linus Torvalds's avatar
Linus Torvalds committed

	/* Unicast */

Linus Torvalds's avatar
Linus Torvalds committed
	 * check socket cache ... must talk to Alan about his plans
	 * for sock caches... i'll skip this for now.
	 */
	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
Linus Torvalds's avatar
Linus Torvalds committed

	if (sk == NULL) {
		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
			goto discard;

		if (udp_lib_checksum_complete(skb))
Linus Torvalds's avatar
Linus Torvalds committed
			goto discard;
		UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
				proto == IPPROTO_UDPLITE);
Linus Torvalds's avatar
Linus Torvalds committed

		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
Linus Torvalds's avatar
Linus Torvalds committed

		kfree_skb(skb);
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
	}
Linus Torvalds's avatar
Linus Torvalds committed
	/* deliver */
	if (sk_rcvqueues_full(sk, skb)) {
		sock_put(sk);
		goto discard;
	}
	bh_lock_sock(sk);
	if (!sock_owned_by_user(sk))
		udpv6_queue_rcv_skb(sk, skb);
Zhu Yi's avatar
Zhu Yi committed
	else if (sk_add_backlog(sk, skb)) {
Zhu Yi's avatar
Zhu Yi committed
		atomic_inc(&sk->sk_drops);
		bh_unlock_sock(sk);
		sock_put(sk);
		goto discard;
	}
	bh_unlock_sock(sk);
Linus Torvalds's avatar
Linus Torvalds committed
	sock_put(sk);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed

short_packet:
	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
		       saddr,
		       ntohs(uh->source),
		       ulen,
		       skb->len,
		       daddr,
		       ntohs(uh->dest));
Linus Torvalds's avatar
Linus Torvalds committed

discard:
	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
Linus Torvalds's avatar
Linus Torvalds committed
	kfree_skb(skb);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
}
static __inline__ int udpv6_rcv(struct sk_buff *skb)
	return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
Linus Torvalds's avatar
Linus Torvalds committed
/*
 * Throw away all pending data and cancel the corking. Socket is locked.
 */
static void udp_v6_flush_pending_frames(struct sock *sk)
{
	struct udp_sock *up = udp_sk(sk);

	if (up->pending == AF_INET)
		udp_flush_pending_frames(sk);
	else if (up->pending) {
Linus Torvalds's avatar
Linus Torvalds committed
		up->len = 0;
		up->pending = 0;
		ip6_flush_pending_frames(sk);
/**
 * 	udp6_hwcsum_outgoing  -  handle outgoing HW checksumming
 * 	@sk: 	socket we are sending on
 * 	@skb: 	sk_buff containing the filled-in UDP header
 * 	        (checksum field must be zeroed out)
 */
static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
				 const struct in6_addr *saddr,
				 const struct in6_addr *daddr, int len)
{
	unsigned int offset;
	struct udphdr *uh = udp_hdr(skb);
	__wsum csum = 0;

	if (skb_queue_len(&sk->sk_write_queue) == 1) {
		/* Only one fragment on the socket.  */
		skb->csum_start = skb_transport_header(skb) - skb->head;
		skb->csum_offset = offsetof(struct udphdr, check);
		uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0);
	} else {
		/*
		 * HW-checksum won't work as there are two or more
		 * fragments on the socket so that all csums of sk_buffs
		 * should be together
		 */
		offset = skb_transport_offset(skb);
		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);

		skb->ip_summed = CHECKSUM_NONE;

		skb_queue_walk(&sk->sk_write_queue, skb) {
			csum = csum_add(csum, skb->csum);
		}

		uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
					    csum);
		if (uh->check == 0)
			uh->check = CSUM_MANGLED_0;
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
/*
 *	Sending
 */

static int udp_v6_push_pending_frames(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
{
	struct sk_buff *skb;
	struct udphdr *uh;
	struct udp_sock  *up = udp_sk(sk);
Linus Torvalds's avatar
Linus Torvalds committed
	struct inet_sock *inet = inet_sk(sk);
	struct flowi *fl = &inet->cork.fl;
	int err = 0;
	int is_udplite = IS_UDPLITE(sk);
	__wsum csum = 0;
Linus Torvalds's avatar
Linus Torvalds committed

	/* Grab the skbuff where UDP header space exists. */
	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
		goto out;

	/*
	 * Create a UDP header
	 */
Linus Torvalds's avatar
Linus Torvalds committed
	uh->source = fl->fl_ip_sport;
	uh->dest = fl->fl_ip_dport;
	uh->len = htons(up->len);
	uh->check = 0;

	if (is_udplite)
		csum = udplite_csum_outgoing(sk, skb);
	else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
		udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst,
				     up->len);
		goto send;
	} else
		csum = udp_csum_outgoing(sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed

	/* add protocol-dependent pseudo-header */
	uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst,
				    up->len, fl->proto, csum   );
Linus Torvalds's avatar
Linus Torvalds committed
	if (uh->check == 0)
		uh->check = CSUM_MANGLED_0;
Linus Torvalds's avatar
Linus Torvalds committed

Linus Torvalds's avatar
Linus Torvalds committed
	err = ip6_push_pending_frames(sk);
	if (err) {
		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
			UDP6_INC_STATS_USER(sock_net(sk),
					    UDP_MIB_SNDBUFERRORS, is_udplite);
			err = 0;
		}
	} else
		UDP6_INC_STATS_USER(sock_net(sk),
				    UDP_MIB_OUTDATAGRAMS, is_udplite);
Linus Torvalds's avatar
Linus Torvalds committed
out:
	up->len = 0;
	up->pending = 0;
	return err;
}

int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
Linus Torvalds's avatar
Linus Torvalds committed
		  struct msghdr *msg, size_t len)
{
	struct ipv6_txoptions opt_space;
	struct udp_sock *up = udp_sk(sk);
	struct inet_sock *inet = inet_sk(sk);
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
	struct in6_addr *daddr, *final_p, final;
Linus Torvalds's avatar
Linus Torvalds committed
	struct ipv6_txoptions *opt = NULL;
	struct ip6_flowlabel *flowlabel = NULL;
Herbert Xu's avatar
Herbert Xu committed
	struct flowi fl;
Linus Torvalds's avatar
Linus Torvalds committed
	struct dst_entry *dst;
	int addr_len = msg->msg_namelen;
	int ulen = len;
	int hlimit = -1;
	int dontfrag = -1;
Linus Torvalds's avatar
Linus Torvalds committed
	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
	int err;
	int is_udplite = IS_UDPLITE(sk);
	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
Linus Torvalds's avatar
Linus Torvalds committed

	/* destination address check */
	if (sin6) {
		if (addr_len < offsetof(struct sockaddr, sa_data))
			return -EINVAL;

		switch (sin6->sin6_family) {
		case AF_INET6:
			if (addr_len < SIN6_LEN_RFC2133)
				return -EINVAL;
			daddr = &sin6->sin6_addr;
			break;
		case AF_INET:
			goto do_udp_sendmsg;
		case AF_UNSPEC:
			msg->msg_name = sin6 = NULL;
			msg->msg_namelen = addr_len = 0;
			daddr = NULL;
			break;
		default:
			return -EINVAL;
		}
	} else if (!up->pending) {
		if (sk->sk_state != TCP_ESTABLISHED)
			return -EDESTADDRREQ;
		daddr = &np->daddr;
Linus Torvalds's avatar
Linus Torvalds committed
		daddr = NULL;

	if (daddr) {
		if (ipv6_addr_v4mapped(daddr)) {
Linus Torvalds's avatar
Linus Torvalds committed
			struct sockaddr_in sin;
			sin.sin_family = AF_INET;
			sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
Linus Torvalds's avatar
Linus Torvalds committed
			sin.sin_addr.s_addr = daddr->s6_addr32[3];
			msg->msg_name = &sin;
			msg->msg_namelen = sizeof(sin);
do_udp_sendmsg:
			if (__ipv6_only_sock(sk))