Skip to content
Snippets Groups Projects
udp.c 36.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	UDP over IPv6
    
     *	Linux INET6 implementation
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
     *	Authors:
    
     *	Pedro Roque		<roque@di.fc.ul.pt>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
     *	Based on linux/ipv4/udp.c
     *
     *	Fixes:
     *	Hideaki YOSHIFUJI	:	sin6_scope_id support
     *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
     *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
     *					a single port at the same time.
     *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
     *      YOSHIFUJI Hideaki @USAGI:	convert /proc/net/udp6 to seq_file.
     *
     *	This program is free software; you can redistribute it and/or
     *      modify it under the terms of the GNU General Public License
     *      as published by the Free Software Foundation; either version
     *      2 of the License, or (at your option) any later version.
     */
    
    #include <linux/errno.h>
    #include <linux/types.h>
    #include <linux/socket.h>
    #include <linux/sockios.h>
    #include <linux/net.h>
    #include <linux/in6.h>
    #include <linux/netdevice.h>
    #include <linux/if_arp.h>
    #include <linux/ipv6.h>
    #include <linux/icmpv6.h>
    #include <linux/init.h>
    
    #include <linux/module.h>
    
    #include <linux/skbuff.h>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include <asm/uaccess.h>
    
    #include <net/ndisc.h>
    #include <net/protocol.h>
    #include <net/transp_v6.h>
    #include <net/ip6_route.h>
    #include <net/raw.h>
    
    #include <net/tcp_states.h>
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #include <net/ip6_checksum.h>
    #include <net/xfrm.h>
    
    #include <linux/proc_fs.h>
    #include <linux/seq_file.h>
    
    #include "udp_impl.h"
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
    {
    	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
    	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
    
    	__be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr;
    
    	__be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
    
    	int sk_ipv6only = ipv6_only_sock(sk);
    	int sk2_ipv6only = inet_v6_ipv6only(sk2);
    	int addr_type = ipv6_addr_type(sk_rcv_saddr6);
    	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
    
    	/* if both are mapped, treat as IPv4 */
    	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
    
    		return (!sk2_ipv6only &&
    
    			(!sk1_rcv_saddr || !sk2_rcv_saddr ||
    			  sk1_rcv_saddr == sk2_rcv_saddr));
    
    
    	if (addr_type2 == IPV6_ADDR_ANY &&
    	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
    		return 1;
    
    	if (addr_type == IPV6_ADDR_ANY &&
    	    !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
    		return 1;
    
    	if (sk2_rcv_saddr6 &&
    	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
    		return 1;
    
    	return 0;
    }
    
    
    static unsigned int udp6_portaddr_hash(struct net *net,
    				       const struct in6_addr *addr6,
    				       unsigned int port)
    {
    	unsigned int hash, mix = net_hash_mix(net);
    
    	if (ipv6_addr_any(addr6))
    		hash = jhash_1word(0, mix);
    
    	else if (ipv6_addr_v4mapped(addr6))
    
    		hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
    
    		hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
    
    int udp_v6_get_port(struct sock *sk, unsigned short snum)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	unsigned int hash2_nulladdr =
    		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
    	unsigned int hash2_partial = 
    		udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0);
    
    
    	/* precompute partial secondary hash */
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
    	return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr);
    
    static inline int compute_score(struct sock *sk, struct net *net,
    				unsigned short hnum,
    				struct in6_addr *saddr, __be16 sport,
    				struct in6_addr *daddr, __be16 dport,
    				int dif)
    {
    	int score = -1;
    
    
    	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
    
    			sk->sk_family == PF_INET6) {
    		struct ipv6_pinfo *np = inet6_sk(sk);
    		struct inet_sock *inet = inet_sk(sk);
    
    		score = 0;
    
    		if (inet->inet_dport) {
    			if (inet->inet_dport != sport)
    
    				return -1;
    			score++;
    		}
    		if (!ipv6_addr_any(&np->rcv_saddr)) {
    			if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
    				return -1;
    			score++;
    		}
    		if (!ipv6_addr_any(&np->daddr)) {
    			if (!ipv6_addr_equal(&np->daddr, saddr))
    				return -1;
    			score++;
    		}
    		if (sk->sk_bound_dev_if) {
    			if (sk->sk_bound_dev_if != dif)
    				return -1;
    			score++;
    		}
    	}
    	return score;
    }
    
    
    #define SCORE2_MAX (1 + 1 + 1)
    static inline int compute_score2(struct sock *sk, struct net *net,
    				const struct in6_addr *saddr, __be16 sport,
    				const struct in6_addr *daddr, unsigned short hnum,
    				int dif)
    {
    	int score = -1;
    
    	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
    			sk->sk_family == PF_INET6) {
    		struct ipv6_pinfo *np = inet6_sk(sk);
    		struct inet_sock *inet = inet_sk(sk);
    
    		if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
    			return -1;
    		score = 0;
    		if (inet->inet_dport) {
    			if (inet->inet_dport != sport)
    				return -1;
    			score++;
    		}
    		if (!ipv6_addr_any(&np->daddr)) {
    			if (!ipv6_addr_equal(&np->daddr, saddr))
    				return -1;
    			score++;
    		}
    		if (sk->sk_bound_dev_if) {
    			if (sk->sk_bound_dev_if != dif)
    				return -1;
    			score++;
    		}
    	}
    	return score;
    }
    
    
    /* called with read_rcu_lock() */
    static struct sock *udp6_lib_lookup2(struct net *net,
    		const struct in6_addr *saddr, __be16 sport,
    		const struct in6_addr *daddr, unsigned int hnum, int dif,
    		struct udp_hslot *hslot2, unsigned int slot2)
    {
    	struct sock *sk, *result;
    	struct hlist_nulls_node *node;
    	int score, badness;
    
    begin:
    	result = NULL;
    	badness = -1;
    	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
    		score = compute_score2(sk, net, saddr, sport,
    				      daddr, hnum, dif);
    		if (score > badness) {
    			result = sk;
    			badness = score;
    			if (score == SCORE2_MAX)
    				goto exact_match;
    		}
    	}
    	/*
    	 * if the nulls value we got at the end of this lookup is
    	 * not the expected one, we must restart lookup.
    	 * We probably met an item that was moved to another chain.
    	 */
    	if (get_nulls_value(node) != slot2)
    		goto begin;
    
    	if (result) {
    exact_match:
    		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
    			result = NULL;
    		else if (unlikely(compute_score2(result, net, saddr, sport,
    				  daddr, hnum, dif) < badness)) {
    			sock_put(result);
    			goto begin;
    		}
    	}
    	return result;
    }
    
    
    static struct sock *__udp6_lib_lookup(struct net *net,
    				      struct in6_addr *saddr, __be16 sport,
    
    				      struct in6_addr *daddr, __be16 dport,
    
    				      int dif, struct udp_table *udptable)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct sock *sk, *result;
    
    	struct hlist_nulls_node *node;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	unsigned short hnum = ntohs(dport);
    
    	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
    	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
    
    	int score, badness;
    
    	rcu_read_lock();
    
    	if (hslot->count > 10) {
    		hash2 = udp6_portaddr_hash(net, daddr, hnum);
    		slot2 = hash2 & udptable->mask;
    		hslot2 = &udptable->hash2[slot2];
    		if (hslot->count < hslot2->count)
    			goto begin;
    
    		result = udp6_lib_lookup2(net, saddr, sport,
    					  daddr, hnum, dif,
    					  hslot2, slot2);
    		if (!result) {
    			hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
    			slot2 = hash2 & udptable->mask;
    			hslot2 = &udptable->hash2[slot2];
    			if (hslot->count < hslot2->count)
    				goto begin;
    
    
    			result = udp6_lib_lookup2(net, saddr, sport,
    						  &in6addr_any, hnum, dif,
    
    						  hslot2, slot2);
    		}
    		rcu_read_unlock();
    		return result;
    	}
    
    begin:
    	result = NULL;
    	badness = -1;
    
    	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
    
    		score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif);
    		if (score > badness) {
    			result = sk;
    			badness = score;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	}
    
    	/*
    	 * if the nulls value we got at the end of this lookup is
    	 * not the expected one, we must restart lookup.
    	 * We probably met an item that was moved to another chain.
    	 */
    
    	if (get_nulls_value(node) != slot)
    
    	if (result) {
    		if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))
    			result = NULL;
    		else if (unlikely(compute_score(result, net, hnum, saddr, sport,
    					daddr, dport, dif) < badness)) {
    			sock_put(result);
    			goto begin;
    		}
    	}
    	rcu_read_unlock();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return result;
    }
    
    
    static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
    					  __be16 sport, __be16 dport,
    
    					  struct udp_table *udptable)
    
    	struct ipv6hdr *iph = ipv6_hdr(skb);
    
    
    	if (unlikely(sk = skb_steal_sock(skb)))
    		return sk;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	return __udp6_lib_lookup(dev_net(skb_dst(skb)->dev), &iph->saddr, sport,
    				 &iph->daddr, dport, inet6_iif(skb),
    				 udptable);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * 	This should be easy, if there is something there we
     * 	return it, otherwise we block.
     */
    
    
    int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		  struct msghdr *msg, size_t len,
    		  int noblock, int flags, int *addr_len)
    {
    	struct ipv6_pinfo *np = inet6_sk(sk);
    	struct inet_sock *inet = inet_sk(sk);
    
    	struct sk_buff *skb;
    
    	unsigned int ulen;
    
    	int err;
    	int is_udplite = IS_UDPLITE(sk);
    
    	bool slow;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (addr_len)
    		*addr_len=sizeof(struct sockaddr_in6);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (flags & MSG_ERRQUEUE)
    		return ipv6_recv_error(sk, msg, len);
    
    
    	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
    		return ipv6_recv_rxpmtu(sk, msg, len);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    try_again:
    
    	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
    				  &peeked, &err);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!skb)
    		goto out;
    
    
    	ulen = skb->len - sizeof(struct udphdr);
    
    	if (len > ulen)
    		len = ulen;
    	else if (len < ulen)
    
    		msg->msg_flags |= MSG_TRUNC;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	is_udp4 = (skb->protocol == htons(ETH_P_IP));
    
    
    	 * If checksum is needed at all, try to do it while copying the
    	 * data.  If the data is truncated, or if we only want a partial
    	 * coverage checksum (UDP-Lite), do it before the copy.
    
    	if (len < ulen || UDP_SKB_CB(skb)->partial_cov) {
    
    		if (udp_lib_checksum_complete(skb))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			goto csum_copy_err;
    
    	if (skb_csum_unnecessary(skb))
    
    		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
    
    					      msg->msg_iov,len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
    		if (err == -EINVAL)
    			goto csum_copy_err;
    	}
    	if (err)
    		goto out_free;
    
    
    	if (!peeked) {
    		if (is_udp4)
    			UDP_INC_STATS_USER(sock_net(sk),
    					UDP_MIB_INDATAGRAMS, is_udplite);
    		else
    			UDP6_INC_STATS_USER(sock_net(sk),
    					UDP_MIB_INDATAGRAMS, is_udplite);
    	}
    
    	sock_recv_ts_and_drops(msg, sk, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Copy the address. */
    	if (msg->msg_name) {
    		struct sockaddr_in6 *sin6;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		sin6 = (struct sockaddr_in6 *) msg->msg_name;
    		sin6->sin6_family = AF_INET6;
    
    		sin6->sin6_port = udp_hdr(skb)->source;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		sin6->sin6_flowinfo = 0;
    		sin6->sin6_scope_id = 0;
    
    
    			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
    					       &sin6->sin6_addr);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		else {
    
    			ipv6_addr_copy(&sin6->sin6_addr,
    				       &ipv6_hdr(skb)->saddr);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
    				sin6->sin6_scope_id = IP6CB(skb)->iif;
    		}
    
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (inet->cmsg_flags)
    			ip_cmsg_recv(msg, skb);
    	} else {
    		if (np->rxopt.all)
    			datagram_recv_ctl(sk, msg, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	err = len;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (flags & MSG_TRUNC)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    out_free:
    
    	skb_free_datagram_locked(sk, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	return err;
    
    csum_copy_err:
    
    	slow = lock_sock_fast(sk);
    
    	if (!skb_kill_datagram(sk, skb, flags)) {
    		if (is_udp4)
    			UDP_INC_STATS_USER(sock_net(sk),
    					UDP_MIB_INERRORS, is_udplite);
    		else
    			UDP6_INC_STATS_USER(sock_net(sk),
    					UDP_MIB_INERRORS, is_udplite);
    	}
    
    	unlock_sock_fast(sk, slow);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (flags & MSG_DONTWAIT)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return -EAGAIN;
    	goto try_again;
    }
    
    
    void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
    
    		    u8 type, u8 code, int offset, __be32 info,
    
    		    struct udp_table *udptable)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct ipv6_pinfo *np;
    	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
    	struct in6_addr *saddr = &hdr->saddr;
    	struct in6_addr *daddr = &hdr->daddr;
    	struct udphdr *uh = (struct udphdr*)(skb->data+offset);
    	struct sock *sk;
    	int err;
    
    
    	sk = __udp6_lib_lookup(dev_net(skb->dev), daddr, uh->dest,
    
    			       saddr, uh->source, inet6_iif(skb), udptable);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (sk == NULL)
    		return;
    
    	np = inet6_sk(sk);
    
    	if (!icmpv6_err_convert(type, code, &err) && !np->recverr)
    		goto out;
    
    	if (sk->sk_state != TCP_ESTABLISHED && !np->recverr)
    		goto out;
    
    	if (np->recverr)
    		ipv6_icmp_error(sk, skb, err, uh->dest, ntohl(info), (u8 *)(uh+1));
    
    	sk->sk_err = err;
    	sk->sk_error_report(sk);
    out:
    	sock_put(sk);
    }
    
    
    static __inline__ void udpv6_err(struct sk_buff *skb,
    
    				 struct inet6_skb_parm *opt, u8 type,
    				 u8 code, int offset, __be32 info     )
    
    	__udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
    
    }
    
    int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct udp_sock *up = udp_sk(sk);
    
    	int is_udplite = IS_UDPLITE(sk);
    
    	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
    		goto drop;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/*
    	 * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
    	 */
    
    	if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
    
    
    		if (up->pcrlen == 0) {          /* full coverage was set  */
    			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: partial coverage"
    				" %d while full coverage %d requested\n",
    				UDP_SKB_CB(skb)->cscov, skb->len);
    			goto drop;
    		}
    		if (UDP_SKB_CB(skb)->cscov  <  up->pcrlen) {
    			LIMIT_NETDEBUG(KERN_WARNING "UDPLITE6: coverage %d "
    						    "too small, need min %d\n",
    				       UDP_SKB_CB(skb)->cscov, up->pcrlen);
    			goto drop;
    		}
    
    	if (sk->sk_filter) {
    		if (udp_lib_checksum_complete(skb))
    			goto drop;
    	}
    
    	if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
    
    		/* Note that an ENOMEM error is charged twice */
    
    		if (rc == -ENOMEM)
    
    			UDP6_INC_STATS_BH(sock_net(sk),
    					UDP_MIB_RCVBUFERRORS, is_udplite);
    
    		goto drop_no_sk_drops_inc;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    
    	atomic_inc(&sk->sk_drops);
    drop_no_sk_drops_inc:
    
    	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
    
    	kfree_skb(skb);
    	return -1;
    
    static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,
    
    				      __be16 loc_port, struct in6_addr *loc_addr,
    				      __be16 rmt_port, struct in6_addr *rmt_addr,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				      int dif)
    {
    
    	struct hlist_nulls_node *node;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct sock *s = sk;
    	unsigned short num = ntohs(loc_port);
    
    
    	sk_nulls_for_each_from(s, node) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		struct inet_sock *inet = inet_sk(s);
    
    
    		if (!net_eq(sock_net(s), net))
    
    		if (udp_sk(s)->udp_port_hash == num &&
    		    s->sk_family == PF_INET6) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			struct ipv6_pinfo *np = inet6_sk(s);
    
    			if (inet->inet_dport) {
    				if (inet->inet_dport != rmt_port)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    					continue;
    			}
    			if (!ipv6_addr_any(&np->daddr) &&
    			    !ipv6_addr_equal(&np->daddr, rmt_addr))
    				continue;
    
    			if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)
    				continue;
    
    			if (!ipv6_addr_any(&np->rcv_saddr)) {
    
    				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr))
    					continue;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			}
    
    			if (!inet6_mc_check(s, loc_addr, rmt_addr))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				continue;
    			return s;
    		}
    	}
    	return NULL;
    }
    
    
    static void flush_stack(struct sock **stack, unsigned int count,
    			struct sk_buff *skb, unsigned int final)
    {
    	unsigned int i;
    	struct sock *sk;
    	struct sk_buff *skb1;
    
    	for (i = 0; i < count; i++) {
    		skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
    
    
    			if (sk_rcvqueues_full(sk, skb)) {
    				kfree_skb(skb1);
    				goto drop;
    			}
    
    			bh_lock_sock(sk);
    			if (!sock_owned_by_user(sk))
    				udpv6_queue_rcv_skb(sk, skb1);
    
    Zhu Yi's avatar
    Zhu Yi committed
    			else if (sk_add_backlog(sk, skb1)) {
    
    Zhu Yi's avatar
    Zhu Yi committed
    				kfree_skb(skb1);
    				bh_unlock_sock(sk);
    				goto drop;
    			}
    
    			bh_unlock_sock(sk);
    
    Zhu Yi's avatar
    Zhu Yi committed
    			continue;
    
    Zhu Yi's avatar
    Zhu Yi committed
    drop:
    		atomic_inc(&sk->sk_drops);
    		UDP6_INC_STATS_BH(sock_net(sk),
    				UDP_MIB_RCVBUFERRORS, IS_UDPLITE(sk));
    		UDP6_INC_STATS_BH(sock_net(sk),
    				UDP_MIB_INERRORS, IS_UDPLITE(sk));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * Note: called only from the BH handler context,
     * so we don't need to lock the hashes.
     */
    
    static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
    		struct in6_addr *saddr, struct in6_addr *daddr,
    
    		struct udp_table *udptable)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct sock *sk, *stack[256 / sizeof(struct sock *)];
    
    	const struct udphdr *uh = udp_hdr(skb);
    
    	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int dif;
    
    	unsigned int i, count = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	spin_lock(&hslot->lock);
    
    	sk = sk_nulls_head(&hslot->head);
    
    	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
    
    	while (sk) {
    		stack[count++] = sk;
    		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,
    				       uh->source, saddr, dif);
    		if (unlikely(count == ARRAY_SIZE(stack))) {
    			if (!sk)
    				break;
    			flush_stack(stack, count, skb, ~0);
    			count = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	/*
    	 * before releasing the lock, we must take reference on sockets
    	 */
    	for (i = 0; i < count; i++)
    		sock_hold(stack[i]);
    
    
    	spin_unlock(&hslot->lock);
    
    
    	if (count) {
    		flush_stack(stack, count, skb, count - 1);
    
    		for (i = 0; i < count; i++)
    			sock_put(stack[i]);
    	} else {
    		kfree_skb(skb);
    	}
    
    static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
    				 int proto)
    
    	int err;
    
    	UDP_SKB_CB(skb)->partial_cov = 0;
    	UDP_SKB_CB(skb)->cscov = skb->len;
    
    
    	if (proto == IPPROTO_UDPLITE) {
    
    		err = udplite_checksum_init(skb, uh);
    		if (err)
    			return err;
    	}
    
    
    	if (uh->check == 0) {
    		/* RFC 2460 section 8.1 says that we SHOULD log
    		   this error. Well, it is reasonable.
    		 */
    		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n");
    		return 1;
    	}
    	if (skb->ip_summed == CHECKSUM_COMPLETE &&
    
    	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
    
    			     skb->len, proto, skb->csum))
    
    		skb->ip_summed = CHECKSUM_UNNECESSARY;
    
    
    	if (!skb_csum_unnecessary(skb))
    
    		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
    							 &ipv6_hdr(skb)->daddr,
    
    							 skb->len, proto, 0));
    
    int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct net *net = dev_net(skb->dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct sock *sk;
    
    	struct udphdr *uh;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct in6_addr *saddr, *daddr;
    	u32 ulen = 0;
    
    	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	saddr = &ipv6_hdr(skb)->saddr;
    	daddr = &ipv6_hdr(skb)->daddr;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	ulen = ntohs(uh->len);
    
    	if (ulen > skb->len)
    		goto short_packet;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (proto == IPPROTO_UDP) {
    		/* UDP validates ulen. */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* Check for jumbo payload */
    		if (ulen == 0)
    			ulen = skb->len;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (ulen < sizeof(*uh))
    			goto short_packet;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (ulen < skb->len) {
    			if (pskb_trim_rcsum(skb, ulen))
    				goto short_packet;
    
    			saddr = &ipv6_hdr(skb)->saddr;
    			daddr = &ipv6_hdr(skb)->daddr;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (udp6_csum_init(skb, uh, proto))
    		goto discard;
    
    
    	/*
    	 *	Multicast receive code
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    
    	if (ipv6_addr_is_multicast(daddr))
    
    		return __udp6_lib_mcast_deliver(net, skb,
    				saddr, daddr, udptable);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Unicast */
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 * check socket cache ... must talk to Alan about his plans
    	 * for sock caches... i'll skip this for now.
    	 */
    
    	sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (sk == NULL) {
    		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
    			goto discard;
    
    
    		if (udp_lib_checksum_complete(skb))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			goto discard;
    
    		UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS,
    				proto == IPPROTO_UDPLITE);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		kfree_skb(skb);
    
    		return 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* deliver */
    
    	if (sk_rcvqueues_full(sk, skb)) {
    		sock_put(sk);
    		goto discard;
    	}
    
    	bh_lock_sock(sk);
    
    	if (!sock_owned_by_user(sk))
    		udpv6_queue_rcv_skb(sk, skb);
    
    Zhu Yi's avatar
    Zhu Yi committed
    	else if (sk_add_backlog(sk, skb)) {
    
    Zhu Yi's avatar
    Zhu Yi committed
    		atomic_inc(&sk->sk_drops);
    		bh_unlock_sock(sk);
    		sock_put(sk);
    		goto discard;
    	}
    
    	bh_unlock_sock(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	sock_put(sk);
    
    	return 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    short_packet:
    
    	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
    
    		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
    
    		       saddr,
    		       ntohs(uh->source),
    		       ulen,
    		       skb->len,
    		       daddr,
    		       ntohs(uh->dest));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    discard:
    
    	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	kfree_skb(skb);
    
    	return 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    static __inline__ int udpv6_rcv(struct sk_buff *skb)
    
    	return __udp6_lib_rcv(skb, &udp_table, IPPROTO_UDP);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * Throw away all pending data and cancel the corking. Socket is locked.
     */
    static void udp_v6_flush_pending_frames(struct sock *sk)
    {
    	struct udp_sock *up = udp_sk(sk);
    
    
    	if (up->pending == AF_INET)
    		udp_flush_pending_frames(sk);
    	else if (up->pending) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		up->len = 0;
    		up->pending = 0;
    		ip6_flush_pending_frames(sk);
    
    /**
     * 	udp6_hwcsum_outgoing  -  handle outgoing HW checksumming
     * 	@sk: 	socket we are sending on
     * 	@skb: 	sk_buff containing the filled-in UDP header
     * 	        (checksum field must be zeroed out)
     */
    static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
    				 const struct in6_addr *saddr,
    				 const struct in6_addr *daddr, int len)
    {
    	unsigned int offset;
    	struct udphdr *uh = udp_hdr(skb);
    	__wsum csum = 0;
    
    	if (skb_queue_len(&sk->sk_write_queue) == 1) {
    		/* Only one fragment on the socket.  */
    		skb->csum_start = skb_transport_header(skb) - skb->head;
    		skb->csum_offset = offsetof(struct udphdr, check);
    		uh->check = ~csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, 0);
    	} else {
    		/*
    		 * HW-checksum won't work as there are two or more
    		 * fragments on the socket so that all csums of sk_buffs
    		 * should be together
    		 */
    		offset = skb_transport_offset(skb);
    		skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
    
    		skb->ip_summed = CHECKSUM_NONE;
    
    		skb_queue_walk(&sk->sk_write_queue, skb) {
    			csum = csum_add(csum, skb->csum);
    		}
    
    		uh->check = csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP,
    					    csum);
    		if (uh->check == 0)
    			uh->check = CSUM_MANGLED_0;
    	}
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	Sending
     */
    
    
    static int udp_v6_push_pending_frames(struct sock *sk)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct sk_buff *skb;
    	struct udphdr *uh;
    
    	struct udp_sock  *up = udp_sk(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct inet_sock *inet = inet_sk(sk);
    	struct flowi *fl = &inet->cork.fl;
    	int err = 0;
    
    	int is_udplite = IS_UDPLITE(sk);
    
    	__wsum csum = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Grab the skbuff where UDP header space exists. */
    	if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
    		goto out;
    
    	/*
    	 * Create a UDP header
    	 */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	uh->source = fl->fl_ip_sport;
    	uh->dest = fl->fl_ip_dport;
    	uh->len = htons(up->len);
    	uh->check = 0;
    
    
    	if (is_udplite)
    
    		csum = udplite_csum_outgoing(sk, skb);
    
    	else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
    		udp6_hwcsum_outgoing(sk, skb, &fl->fl6_src, &fl->fl6_dst,
    				     up->len);
    		goto send;
    	} else
    
    		csum = udp_csum_outgoing(sk, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/* add protocol-dependent pseudo-header */
    	uh->check = csum_ipv6_magic(&fl->fl6_src, &fl->fl6_dst,
    				    up->len, fl->proto, csum   );
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (uh->check == 0)
    
    		uh->check = CSUM_MANGLED_0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	err = ip6_push_pending_frames(sk);
    
    	if (err) {
    		if (err == -ENOBUFS && !inet6_sk(sk)->recverr) {
    			UDP6_INC_STATS_USER(sock_net(sk),
    					    UDP_MIB_SNDBUFERRORS, is_udplite);
    			err = 0;
    		}
    	} else
    		UDP6_INC_STATS_USER(sock_net(sk),
    				    UDP_MIB_OUTDATAGRAMS, is_udplite);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	up->len = 0;
    	up->pending = 0;
    	return err;
    }
    
    
    int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		  struct msghdr *msg, size_t len)
    {
    	struct ipv6_txoptions opt_space;
    	struct udp_sock *up = udp_sk(sk);
    	struct inet_sock *inet = inet_sk(sk);
    	struct ipv6_pinfo *np = inet6_sk(sk);
    	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
    	struct in6_addr *daddr, *final_p = NULL, final;
    	struct ipv6_txoptions *opt = NULL;
    	struct ip6_flowlabel *flowlabel = NULL;
    
    Herbert Xu's avatar
    Herbert Xu committed
    	struct flowi fl;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct dst_entry *dst;
    	int addr_len = msg->msg_namelen;
    	int ulen = len;
    	int hlimit = -1;
    
    	int dontfrag = -1;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
    	int err;
    
    	int is_udplite = IS_UDPLITE(sk);
    
    	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* destination address check */
    	if (sin6) {
    		if (addr_len < offsetof(struct sockaddr, sa_data))
    			return -EINVAL;
    
    		switch (sin6->sin6_family) {
    		case AF_INET6:
    			if (addr_len < SIN6_LEN_RFC2133)
    				return -EINVAL;
    			daddr = &sin6->sin6_addr;
    			break;
    		case AF_INET:
    			goto do_udp_sendmsg;
    		case AF_UNSPEC:
    			msg->msg_name = sin6 = NULL;
    			msg->msg_namelen = addr_len = 0;
    			daddr = NULL;
    			break;
    		default:
    			return -EINVAL;
    		}
    	} else if (!up->pending) {
    		if (sk->sk_state != TCP_ESTABLISHED)
    			return -EDESTADDRREQ;
    		daddr = &np->daddr;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		daddr = NULL;
    
    	if (daddr) {
    
    		if (ipv6_addr_v4mapped(daddr)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			struct sockaddr_in sin;
    			sin.sin_family = AF_INET;
    
    			sin.sin_port = sin6 ? sin6->sin6_port : inet->inet_dport;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			sin.sin_addr.s_addr = daddr->s6_addr32[3];
    			msg->msg_name = &sin;
    			msg->msg_namelen = sizeof(sin);
    do_udp_sendmsg:
    			if (__ipv6_only_sock(sk))
    				return -ENETUNREACH;
    			return udp_sendmsg(iocb, sk, msg, len);
    		}
    	}
    
    	if (up->pending == AF_INET)
    		return udp_sendmsg(iocb, sk, msg, len);
    
    	/* Rough check on arithmetic overflow,
    
    	   better check is made in ip6_append_data().
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	   */
    	if (len > INT_MAX - sizeof(struct udphdr))
    		return -EMSGSIZE;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (up->pending) {
    		/*
    		 * There are pending frames.