Skip to content
Snippets Groups Projects
ip6_output.c 32.5 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    	 *
    	 * FIXME: It may be fragmented into multiple chunks 
    	 *        at once if non-fragmentable extension headers
    	 *        are too large.
    	 * --yoshfuji 
    	 */
    
    	inet->cork.length += length;
    
    	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
    	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
    
    
    		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
    					  fragheaderlen, transhdrlen, mtu,
    					  flags);
    		if (err)
    
    			goto error;
    		return 0;
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
    		goto alloc_new_skb;
    
    	while (length > 0) {
    		/* Check if the remaining data fits into current packet. */
    		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
    		if (copy < length)
    			copy = maxfraglen - skb->len;
    
    		if (copy <= 0) {
    			char *data;
    			unsigned int datalen;
    			unsigned int fraglen;
    			unsigned int fraggap;
    			unsigned int alloclen;
    			struct sk_buff *skb_prev;
    alloc_new_skb:
    			skb_prev = skb;
    
    			/* There's no room in the current skb */
    			if (skb_prev)
    				fraggap = skb_prev->len - maxfraglen;
    			else
    				fraggap = 0;
    
    			/*
    			 * If remaining data exceeds the mtu,
    			 * we know we need more fragment(s).
    			 */
    			datalen = length + fraggap;
    			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
    				datalen = maxfraglen - fragheaderlen;
    
    			fraglen = datalen + fragheaderlen;
    			if ((flags & MSG_MORE) &&
    			    !(rt->u.dst.dev->features&NETIF_F_SG))
    				alloclen = mtu;
    			else
    				alloclen = datalen + fragheaderlen;
    
    			/*
    			 * The last fragment gets additional space at tail.
    			 * Note: we overallocate on fragments with MSG_MODE
    			 * because we have no idea if we're the last one.
    			 */
    			if (datalen == length + fraggap)
    				alloclen += rt->u.dst.trailer_len;
    
    			/*
    			 * We just reserve space for fragment header.
    			 * Note: this may be overallocation if the message 
    			 * (without MSG_MORE) fits into the MTU.
    			 */
    			alloclen += sizeof(struct frag_hdr);
    
    			if (transhdrlen) {
    				skb = sock_alloc_send_skb(sk,
    						alloclen + hh_len,
    						(flags & MSG_DONTWAIT), &err);
    			} else {
    				skb = NULL;
    				if (atomic_read(&sk->sk_wmem_alloc) <=
    				    2 * sk->sk_sndbuf)
    					skb = sock_wmalloc(sk,
    							   alloclen + hh_len, 1,
    							   sk->sk_allocation);
    				if (unlikely(skb == NULL))
    					err = -ENOBUFS;
    			}
    			if (skb == NULL)
    				goto error;
    			/*
    			 *	Fill in the control structures
    			 */
    			skb->ip_summed = csummode;
    			skb->csum = 0;
    			/* reserve for fragmentation */
    			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
    
    			/*
    			 *	Find where to start putting bytes
    			 */
    			data = skb_put(skb, fraglen);
    			skb->nh.raw = data + exthdrlen;
    			data += fragheaderlen;
    			skb->h.raw = data + exthdrlen;
    
    			if (fraggap) {
    				skb->csum = skb_copy_and_csum_bits(
    					skb_prev, maxfraglen,
    					data + transhdrlen, fraggap, 0);
    				skb_prev->csum = csum_sub(skb_prev->csum,
    							  skb->csum);
    				data += fraggap;
    
    				pskb_trim_unique(skb_prev, maxfraglen);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			}
    			copy = datalen - transhdrlen - fraggap;
    			if (copy < 0) {
    				err = -EINVAL;
    				kfree_skb(skb);
    				goto error;
    			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
    				err = -EFAULT;
    				kfree_skb(skb);
    				goto error;
    			}
    
    			offset += copy;
    			length -= datalen - fraggap;
    			transhdrlen = 0;
    			exthdrlen = 0;
    			csummode = CHECKSUM_NONE;
    
    			/*
    			 * Put the packet on the pending queue
    			 */
    			__skb_queue_tail(&sk->sk_write_queue, skb);
    			continue;
    		}
    
    		if (copy > length)
    			copy = length;
    
    		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
    			unsigned int off;
    
    			off = skb->len;
    			if (getfrag(from, skb_put(skb, copy),
    						offset, copy, off, skb) < 0) {
    				__skb_trim(skb, off);
    				err = -EFAULT;
    				goto error;
    			}
    		} else {
    			int i = skb_shinfo(skb)->nr_frags;
    			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
    			struct page *page = sk->sk_sndmsg_page;
    			int off = sk->sk_sndmsg_off;
    			unsigned int left;
    
    			if (page && (left = PAGE_SIZE - off) > 0) {
    				if (copy >= left)
    					copy = left;
    				if (page != frag->page) {
    					if (i == MAX_SKB_FRAGS) {
    						err = -EMSGSIZE;
    						goto error;
    					}
    					get_page(page);
    					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
    					frag = &skb_shinfo(skb)->frags[i];
    				}
    			} else if(i < MAX_SKB_FRAGS) {
    				if (copy > PAGE_SIZE)
    					copy = PAGE_SIZE;
    				page = alloc_pages(sk->sk_allocation, 0);
    				if (page == NULL) {
    					err = -ENOMEM;
    					goto error;
    				}
    				sk->sk_sndmsg_page = page;
    				sk->sk_sndmsg_off = 0;
    
    				skb_fill_page_desc(skb, i, page, 0, 0);
    				frag = &skb_shinfo(skb)->frags[i];
    				skb->truesize += PAGE_SIZE;
    				atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
    			} else {
    				err = -EMSGSIZE;
    				goto error;
    			}
    			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
    				err = -EFAULT;
    				goto error;
    			}
    			sk->sk_sndmsg_off += copy;
    			frag->size += copy;
    			skb->len += copy;
    			skb->data_len += copy;
    		}
    		offset += copy;
    		length -= copy;
    	}
    	return 0;
    error:
    	inet->cork.length -= length;
    	IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
    	return err;
    }
    
    int ip6_push_pending_frames(struct sock *sk)
    {
    	struct sk_buff *skb, *tmp_skb;
    	struct sk_buff **tail_skb;
    	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
    	struct inet_sock *inet = inet_sk(sk);
    	struct ipv6_pinfo *np = inet6_sk(sk);
    	struct ipv6hdr *hdr;
    	struct ipv6_txoptions *opt = np->cork.opt;
    	struct rt6_info *rt = np->cork.rt;
    	struct flowi *fl = &inet->cork.fl;
    	unsigned char proto = fl->proto;
    	int err = 0;
    
    	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
    		goto out;
    	tail_skb = &(skb_shinfo(skb)->frag_list);
    
    	/* move skb->data to ip header from ext header */
    	if (skb->data < skb->nh.raw)
    		__skb_pull(skb, skb->nh.raw - skb->data);
    	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
    		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
    		*tail_skb = tmp_skb;
    		tail_skb = &(tmp_skb->next);
    		skb->len += tmp_skb->len;
    		skb->data_len += tmp_skb->len;
    		skb->truesize += tmp_skb->truesize;
    		__sock_put(tmp_skb->sk);
    		tmp_skb->destructor = NULL;
    		tmp_skb->sk = NULL;
    	}
    
    	ipv6_addr_copy(final_dst, &fl->fl6_dst);
    	__skb_pull(skb, skb->h.raw - skb->nh.raw);
    	if (opt && opt->opt_flen)
    		ipv6_push_frag_opts(skb, opt, &proto);
    	if (opt && opt->opt_nflen)
    		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
    
    	skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
    	
    
    	*(u32*)hdr = fl->fl6_flowlabel |
    		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
    		hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
    	else
    		hdr->payload_len = 0;
    	hdr->hop_limit = np->cork.hop_limit;
    	hdr->nexthdr = proto;
    	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
    	ipv6_addr_copy(&hdr->daddr, final_dst);
    
    
    	skb->priority = sk->sk_priority;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	skb->dst = dst_clone(&rt->u.dst);
    	IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);	
    	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
    	if (err) {
    		if (err > 0)
    
    			err = np->recverr ? net_xmit_errno(err) : 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (err)
    			goto error;
    	}
    
    out:
    	inet->cork.flags &= ~IPCORK_OPT;
    
    Jesper Juhl's avatar
    Jesper Juhl committed
    	kfree(np->cork.opt);
    	np->cork.opt = NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (np->cork.rt) {
    		dst_release(&np->cork.rt->u.dst);
    		np->cork.rt = NULL;
    		inet->cork.flags &= ~IPCORK_ALLFRAG;
    	}
    	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
    	return err;
    error:
    	goto out;
    }
    
    void ip6_flush_pending_frames(struct sock *sk)
    {
    	struct inet_sock *inet = inet_sk(sk);
    	struct ipv6_pinfo *np = inet6_sk(sk);
    	struct sk_buff *skb;
    
    	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
    		IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
    		kfree_skb(skb);
    	}
    
    	inet->cork.flags &= ~IPCORK_OPT;
    
    
    Jesper Juhl's avatar
    Jesper Juhl committed
    	kfree(np->cork.opt);
    	np->cork.opt = NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (np->cork.rt) {
    		dst_release(&np->cork.rt->u.dst);
    		np->cork.rt = NULL;
    		inet->cork.flags &= ~IPCORK_ALLFRAG;
    	}
    	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
    }