Skip to content
Snippets Groups Projects
tcp_output.c 93.3 KiB
Newer Older
  • Learn to ignore specific revisions
  • 						     (tcp_skb_is_last(sk, skb) ?
    						      nonagle : TCP_NAGLE_PUSH))))
    				break;
    		} else {
    
    			if (!push_one && tcp_tso_should_defer(sk, skb))
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		/* TSQ : sk_wmem_alloc accounts skb truesize,
    		 * including skb overhead. But thats OK.
    		 */
    		if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
    			set_bit(TSQ_THROTTLED, &tp->tsq_flags);
    			break;
    		}
    
    		if (tso_segs > 1 && !tcp_urg_mode(tp))
    
    			limit = tcp_mss_split_point(sk, skb, mss_now,
    
    						    min_t(unsigned int,
    							  cwnd_quota,
    							  sk->sk_gso_max_segs));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
    
    		TCP_SKB_CB(skb)->when = tcp_time_stamp;
    
    		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* Advance the send_head.  This one is sent out.
    		 * This call will increment packets_out.
    		 */
    
    		tcp_event_new_data_sent(sk, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		tcp_minshall_update(tp, mss_now, skb);
    
    		sent_pkts += tcp_skb_pcount(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (tcp_in_cwnd_reduction(sk))
    			tp->prr_out += sent_pkts;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	return !tp->packets_out && tcp_send_head(sk);
    
    /* Push out any pending frames which were held back due to
     * TCP_CORK or attempt at coalescing tiny packets.
     * The socket must be locked by the caller.
     */
    
    void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
    			       int nonagle)
    
    	/* If we are closed, the bytes will have to remain here.
    	 * In time closedown will finish, we empty the write queue and
    	 * all will be happy.
    	 */
    	if (unlikely(sk->sk_state == TCP_CLOSE))
    		return;
    
    
    	if (tcp_write_xmit(sk, cur_mss, nonagle, 0,
    			   sk_gfp_atomic(sk, GFP_ATOMIC)))
    
    		tcp_check_probe_timer(sk);
    
    /* Send _single_ skb sitting at the send head. This function requires
     * true push pending frames to setup probe timer etc.
     */
    void tcp_push_one(struct sock *sk, unsigned int mss_now)
    {
    
    	struct sk_buff *skb = tcp_send_head(sk);
    
    
    	BUG_ON(!skb || skb->len < mss_now);
    
    
    	tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* This function returns the amount that we can raise the
     * usable window based on the following constraints
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * 1. The window can never be shrunk once it is offered (RFC 793)
     * 2. We limit memory per socket
     *
     * RFC 1122:
     * "the suggested [SWS] avoidance algorithm for the receiver is to keep
     *  RECV.NEXT + RCV.WIN fixed until:
     *  RCV.BUFF - RCV.USER - RCV.WINDOW >= min(1/2 RCV.BUFF, MSS)"
     *
     * i.e. don't raise the right edge of the window until you can raise
     * it at least MSS bytes.
     *
     * Unfortunately, the recommended algorithm breaks header prediction,
     * since header prediction assumes th->window stays fixed.
     *
     * Strictly speaking, keeping th->window fixed violates the receiver
     * side SWS prevention criteria. The problem is that under this rule
     * a stream of single byte packets will cause the right side of the
     * window to always advance by a single byte.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * Of course, if the sender implements sender side SWS prevention
     * then this will not be a problem.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * BSD seems to make the following compromise:
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *	If the free space is less than the 1/4 of the maximum
     *	space available and the free space is less than 1/2 mss,
     *	then set the window to 0.
     *	[ Actually, bsd uses MSS and 1/4 of maximal _window_ ]
     *	Otherwise, just prevent the window from shrinking
     *	and from being larger than the largest representable value.
     *
     * This prevents incremental opening of the window in the regime
     * where TCP is limited by the speed of the reader side taking
     * data out of the TCP receive queue. It does nothing about
     * those cases where the window is constrained on the sender side
     * because the pipeline is full.
     *
     * BSD also seems to "accidentally" limit itself to windows that are a
     * multiple of MSS, at least until the free space gets quite small.
     * This would appear to be a side effect of the mbuf implementation.
     * Combining these two algorithms results in the observed behavior
     * of having a fixed window size at almost all times.
     *
     * Below we obtain similar behavior by forcing the offered window to
     * a multiple of the mss when it is feasible to do so.
     *
     * Note, we don't "adjust" for TIMESTAMP or SACK option bytes.
     * Regular options like TIMESTAMP are taken into account.
     */
    u32 __tcp_select_window(struct sock *sk)
    {
    
    	struct inet_connection_sock *icsk = inet_csk(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct tcp_sock *tp = tcp_sk(sk);
    
    Stephen Hemminger's avatar
    Stephen Hemminger committed
    	/* MSS for the peer's data.  Previous versions used mss_clamp
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 * here.  I don't know if the value based on our guesses
    	 * of peer's MSS is better for the performance.  It's more correct
    	 * but may be worse for the performance because of rcv_mss
    	 * fluctuations.  --SAW  1998/11/1
    	 */
    
    	int mss = icsk->icsk_ack.rcv_mss;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int free_space = tcp_space(sk);
    	int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk));
    	int window;
    
    	if (mss > full_space)
    
    		mss = full_space;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (free_space < (full_space >> 1)) {
    
    		icsk->icsk_ack.quick = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (sk_under_memory_pressure(sk))
    
    			tp->rcv_ssthresh = min(tp->rcv_ssthresh,
    					       4U * tp->advmss);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		if (free_space < mss)
    			return 0;
    	}
    
    	if (free_space > tp->rcv_ssthresh)
    		free_space = tp->rcv_ssthresh;
    
    	/* Don't do rounding if we are using window scaling, since the
    	 * scaled window will not line up with the MSS boundary anyway.
    	 */
    	window = tp->rcv_wnd;
    	if (tp->rx_opt.rcv_wscale) {
    		window = free_space;
    
    		/* Advertise enough space so that it won't get scaled away.
    		 * Import case: prevent zero window announcement if
    		 * 1<<rcv_wscale > mss.
    		 */
    		if (((window >> tp->rx_opt.rcv_wscale) << tp->rx_opt.rcv_wscale) != window)
    			window = (((window >> tp->rx_opt.rcv_wscale) + 1)
    				  << tp->rx_opt.rcv_wscale);
    	} else {
    		/* Get the largest window that is a nice multiple of mss.
    		 * Window clamp already applied above.
    		 * If our current window offering is within 1 mss of the
    		 * free space we just keep it. This prevents the divide
    		 * and multiply from happening most of the time.
    		 * We also don't do any window rounding when the free space
    		 * is too small.
    		 */
    		if (window <= free_space - mss || window > free_space)
    
    			window = (free_space / mss) * mss;
    
    		else if (mss == full_space &&
    
    			 free_space > window + (full_space >> 1))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	return window;
    }
    
    
    /* Collapses two adjacent SKB's during retransmission. */
    static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    
    	struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
    
    	int skb_size, next_skb_size;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	skb_size = skb->len;
    	next_skb_size = next_skb->len;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
    
    	tcp_highest_sack_combine(sk, next_skb, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	tcp_unlink_write_queue(next_skb, sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
    				  next_skb_size);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (next_skb->ip_summed == CHECKSUM_PARTIAL)
    		skb->ip_summed = CHECKSUM_PARTIAL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (skb->ip_summed != CHECKSUM_PARTIAL)
    		skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/* Update sequence range on original skb. */
    	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/* Merge over control information. This moves PSH/FIN etc. over */
    
    	TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags;
    
    
    	/* All done, get rid of second SKB and account for it so
    	 * packet counting does not break.
    	 */
    	TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
    
    	/* changed transmit queue under us so clear hints */
    
    	tcp_clear_retrans_hints_partial(tp);
    	if (next_skb == tp->retransmit_skb_hint)
    		tp->retransmit_skb_hint = skb;
    
    	tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
    
    
    	sk_wmem_free_skb(sk, next_skb);
    
    /* Check if coalescing SKBs is legal. */
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
    
    {
    	if (tcp_skb_pcount(skb) > 1)
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    	/* TODO: SACK collapsing could be used to remove this condition */
    	if (skb_shinfo(skb)->nr_frags != 0)
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    	if (skb_cloned(skb))
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    	if (skb == tcp_send_head(sk))
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    	/* Some heurestics for collapsing over SACK'd could be invented */
    	if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	return true;
    
    /* Collapse packets in the retransmit queue to make to create
     * less packets on the wire. This is only done on retransmission.
     */
    
    static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
    				     int space)
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct sk_buff *skb = to, *tmp;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	bool first = true;
    
    
    	if (!sysctl_tcp_retrans_collapse)
    		return;
    
    	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
    
    		return;
    
    	tcp_for_write_queue_from_safe(skb, tmp, sk) {
    		if (!tcp_can_collapse(sk, skb))
    			break;
    
    		space -= skb->len;
    
    		if (first) {
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    			first = false;
    
    			continue;
    		}
    
    		if (space < 0)
    			break;
    		/* Punt if not enough space exists in the first SKB for
    		 * the data in the second
    		 */
    
    		if (skb->len > skb_availroom(to))
    
    			break;
    
    		if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
    			break;
    
    		tcp_collapse_retrans(sk, to);
    	}
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* This retransmits one SKB.  Policy decisions and retransmit queue
     * state updates are done by the caller.  Returns non-zero if an
     * error occurred which prevented the send.
     */
    
    int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    
    John Heffner's avatar
    John Heffner committed
    	struct inet_connection_sock *icsk = inet_csk(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    John Heffner's avatar
    John Heffner committed
    	/* Inconslusive MTU probe */
    	if (icsk->icsk_mtup.probe_size) {
    		icsk->icsk_mtup.probe_size = 0;
    	}
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Do not sent more than we queued. 1/4 is reserved for possible
    
    Stephen Hemminger's avatar
    Stephen Hemminger committed
    	 * copying overhead: fragmentation, tunneling, mangling etc.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    	if (atomic_read(&sk->sk_wmem_alloc) >
    	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
    		return -EAGAIN;
    
    	if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
    		if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
    			BUG();
    		if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
    			return -ENOMEM;
    	}
    
    
    	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
    		return -EHOSTUNREACH; /* Routing failure or similar. */
    
    
    	cur_mss = tcp_current_mss(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* If receiver has shrunk his window, and skb is out of
    	 * new window, do not retransmit it. The exception is the
    	 * case, when window is shrunk to zero. In this case
    	 * our retransmit serves as a zero window probe.
    	 */
    
    	if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
    	    TCP_SKB_CB(skb)->seq != tp->snd_una)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return -EAGAIN;
    
    	if (skb->len > cur_mss) {
    
    		if (tcp_fragment(sk, skb, cur_mss, cur_mss))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			return -ENOMEM; /* We'll try again later. */
    
    		int oldpcount = tcp_skb_pcount(skb);
    
    		if (unlikely(oldpcount > 1)) {
    			tcp_init_tso_segs(sk, skb, cur_mss);
    			tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
    		}
    
    	tcp_retrans_try_collapse(sk, skb, cur_mss);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Some Solaris stacks overoptimize and ignore the FIN on a
    	 * retransmit when old data is attached.  So strip it off
    	 * since it is cheap to do so and saves bytes on the network.
    	 */
    
    	if (skb->len > 0 &&
    
    	    (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
    
    	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!pskb_trim(skb, 0)) {
    
    			/* Reuse, even though it does some unnecessary work */
    			tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
    
    					     TCP_SKB_CB(skb)->tcp_flags);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			skb->ip_summed = CHECKSUM_NONE;
    		}
    	}
    
    	/* Make a copy, if the first transmission SKB clone we made
    	 * is still in somebody's hands, else make a clone.
    	 */
    	TCP_SKB_CB(skb)->when = tcp_time_stamp;
    
    
    	/* make sure skb->data is aligned on arches that require it */
    	if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
    		struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
    						   GFP_ATOMIC);
    
    		return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
    			      -ENOBUFS;
    
    	} else {
    
    		return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
    
    }
    
    int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    	int err = __tcp_retransmit_skb(sk, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (err == 0) {
    		/* Update global TCP statistics. */
    
    		TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		tp->total_retrans++;
    
    #if FASTRETRANS_DEBUG > 0
    
    		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
    
    			net_dbg_ratelimited("retrans_out leaked\n");
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    #endif
    
    		if (!tp->retrans_out)
    			tp->lost_retrans_low = tp->snd_nxt;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
    		tp->retrans_out += tcp_skb_pcount(skb);
    
    		/* Save stamp of the first retransmit. */
    		if (!tp->retrans_stamp)
    			tp->retrans_stamp = TCP_SKB_CB(skb)->when;
    
    
    		tp->undo_retrans += tcp_skb_pcount(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		/* snd_nxt is stored to detect loss of retransmitted segment,
    		 * see tcp_input.c tcp_sacktag_write_queue().
    		 */
    		TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
    	}
    	return err;
    }
    
    
    /* Check if we forward retransmits are possible in the current
     * window/congestion state.
     */
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    static bool tcp_can_forward_retransmit(struct sock *sk)
    
    {
    	const struct inet_connection_sock *icsk = inet_csk(sk);
    
    	const struct tcp_sock *tp = tcp_sk(sk);
    
    
    	/* Forward retransmissions are possible only during Recovery. */
    	if (icsk->icsk_ca_state != TCP_CA_Recovery)
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    
    	/* No forward retransmissions in Reno are possible. */
    	if (tcp_is_reno(tp))
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    
    	/* Yeah, we have to make difficult choice between forward transmission
    	 * and retransmission... Both ways have their merits...
    	 *
    	 * For now we do not retransmit anything, while we have some new
    	 * segments to send. In the other cases, follow rule 3 for
    	 * NextSeg() specified in RFC3517.
    	 */
    
    	if (tcp_may_send_now(sk))
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	return true;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* This gets called after a retransmit timeout, and the initially
     * retransmitted data is acknowledged.  It tries to continue
     * resending the rest of the retransmit queue, until either
     * we've sent it all or the congestion window limit is reached.
     * If doing SACK, the first ACK which comes back for a timeout
     * based retransmit packet might feed us FACK information again.
     * If so, we use it to avoid unnecessarily retransmissions.
     */
    void tcp_xmit_retransmit_queue(struct sock *sk)
    {
    
    	const struct inet_connection_sock *icsk = inet_csk(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct sk_buff *skb;
    
    	struct sk_buff *hole = NULL;
    
    	int mib_idx;
    
    	int fwd_rexmitting = 0;
    
    	if (!tp->packets_out)
    		return;
    
    
    	if (!tp->lost_out)
    		tp->retransmit_high = tp->snd_una;
    
    
    	if (tp->retransmit_skb_hint) {
    
    		skb = tp->retransmit_skb_hint;
    
    		last_lost = TCP_SKB_CB(skb)->end_seq;
    		if (after(last_lost, tp->retransmit_high))
    			last_lost = tp->retransmit_high;
    	} else {
    
    		skb = tcp_write_queue_head(sk);
    
    		last_lost = tp->snd_una;
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	tcp_for_write_queue_from(skb, sk) {
    		__u8 sacked = TCP_SKB_CB(skb)->sacked;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (skb == tcp_send_head(sk))
    			break;
    		/* we could do better than to assign each time */
    
    		if (hole == NULL)
    			tp->retransmit_skb_hint = skb;
    
    
    		/* Assume this retransmit will generate
    		 * only one packet for congestion window
    		 * calculation purposes.  This works because
    		 * tcp_retransmit_skb() will chop up the
    		 * packet to be MSS sized and all the
    		 * packet counting works out.
    		 */
    		if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
    			return;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (fwd_rexmitting) {
    begin_fwd:
    			if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
    				break;
    			mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
    
    		} else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
    
    			tp->retransmit_high = last_lost;
    
    			if (!tcp_can_forward_retransmit(sk))
    				break;
    			/* Backtrack if necessary to non-L'ed skb */
    			if (hole != NULL) {
    				skb = hole;
    				hole = NULL;
    			}
    			fwd_rexmitting = 1;
    			goto begin_fwd;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		} else if (!(sacked & TCPCB_LOST)) {
    
    			if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
    
    				hole = skb;
    			continue;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    			last_lost = TCP_SKB_CB(skb)->end_seq;
    
    			if (icsk->icsk_ca_state != TCP_CA_Loss)
    				mib_idx = LINUX_MIB_TCPFASTRETRANS;
    			else
    				mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			continue;
    
    
    		if (tcp_retransmit_skb(sk, skb)) {
    			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
    
    		NET_INC_STATS_BH(sock_net(sk), mib_idx);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (tcp_in_cwnd_reduction(sk))
    
    			tp->prr_out += tcp_skb_pcount(skb);
    
    
    		if (skb == tcp_write_queue_head(sk))
    
    			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
    						  inet_csk(sk)->icsk_rto,
    						  TCP_RTO_MAX);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    }
    
    /* Send a fin.  The caller locks the socket for us.  This cannot be
     * allowed to fail queueing a FIN frame under any circumstances.
     */
    void tcp_send_fin(struct sock *sk)
    {
    
    	struct tcp_sock *tp = tcp_sk(sk);
    
    	struct sk_buff *skb = tcp_write_queue_tail(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int mss_now;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Optimization, tack on the FIN if we have a queue of
    	 * unsent frames.  But be careful about outgoing SACKS
    	 * and IP options.
    	 */
    
    	mss_now = tcp_current_mss(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (tcp_send_head(sk) != NULL) {
    
    		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		TCP_SKB_CB(skb)->end_seq++;
    		tp->write_seq++;
    	} else {
    		/* Socket is locked, keep trying until memory is available. */
    		for (;;) {
    
    			skb = alloc_skb_fclone(MAX_TCP_HEADER,
    					       sk->sk_allocation);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (skb)
    				break;
    			yield();
    		}
    
    		/* Reserve space for headers and prepare control bits. */
    		skb_reserve(skb, MAX_TCP_HEADER);
    		/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
    
    		tcp_init_nondata_skb(skb, tp->write_seq,
    
    Changli Gao's avatar
    Changli Gao committed
    				     TCPHDR_ACK | TCPHDR_FIN);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		tcp_queue_skb(sk, skb);
    	}
    
    	__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    /* We get here when a process closes a file descriptor (either due to
     * an explicit close() or as a byproduct of exit()'ing) and there
     * was unread data in the receive queue.  This behavior is recommended
    
     * by RFC 2525, section 2.17.  -DaveM
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    void tcp_send_active_reset(struct sock *sk, gfp_t priority)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct sk_buff *skb;
    
    	/* NOTE: No TCP options attached and we never retransmit this. */
    	skb = alloc_skb(MAX_TCP_HEADER, priority);
    	if (!skb) {
    
    		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return;
    	}
    
    	/* Reserve space for headers and prepare control bits. */
    	skb_reserve(skb, MAX_TCP_HEADER);
    
    	tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
    
    Changli Gao's avatar
    Changli Gao committed
    			     TCPHDR_ACK | TCPHDR_RST);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Send it off. */
    	TCP_SKB_CB(skb)->when = tcp_time_stamp;
    
    	if (tcp_transmit_skb(sk, skb, 0, priority))
    
    		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
    
    	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
    
    /* Send a crossed SYN-ACK during socket establishment.
     * WARNING: This routine must only be called when we have already sent
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * a SYN packet that crossed the incoming SYN that caused this routine
     * to get called. If this assumption fails then the initial rcv_wnd
     * and rcv_wscale values will not be correct.
     */
    int tcp_send_synack(struct sock *sk)
    {
    
    	struct sk_buff *skb;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	skb = tcp_write_queue_head(sk);
    
    	if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
    
    		pr_debug("%s: wrong queue state\n", __func__);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return -EFAULT;
    	}
    
    	if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (skb_cloned(skb)) {
    			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
    			if (nskb == NULL)
    				return -ENOMEM;
    
    			tcp_unlink_write_queue(skb, sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			skb_header_release(nskb);
    
    			__tcp_add_write_queue_head(sk, nskb);
    
    			sk_wmem_free_skb(sk, skb);
    			sk->sk_wmem_queued += nskb->truesize;
    			sk_mem_charge(sk, nskb->truesize);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			skb = nskb;
    		}
    
    
    		TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		TCP_ECN_send_synack(tcp_sk(sk), skb);
    	}
    	TCP_SKB_CB(skb)->when = tcp_time_stamp;
    
    	return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
    
    /**
     * tcp_make_synack - Prepare a SYN-ACK.
     * sk: listener socket
     * dst: dst entry attached to the SYNACK
     * req: request_sock pointer
     * rvp: request_values pointer
     *
     * Allocate one skb and build a SYNACK packet.
     * @dst is consumed : Caller should not use it again.
     */
    
    struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
    
    				struct request_values *rvp,
    				struct tcp_fastopen_cookie *foc)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct tcp_out_options opts;
    
    	struct tcp_extend_values *xvp = tcp_xv(rvp);
    
    	struct inet_request_sock *ireq = inet_rsk(req);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct tcp_sock *tp = tcp_sk(sk);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	const struct tcp_cookie_values *cvp = tp->cookie_values;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct tcphdr *th;
    	struct sk_buff *skb;
    
    	struct tcp_md5sig_key *md5;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	int s_data_desired = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
    		s_data_desired = cvp->s_data_desired;
    
    	skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired,
    			sk_gfp_atomic(sk, GFP_ATOMIC));
    
    	if (unlikely(!skb)) {
    		dst_release(dst);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Reserve space for headers. */
    	skb_reserve(skb, MAX_TCP_HEADER);
    
    
    	skb_dst_set(skb, dst);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	mss = dst_metric_advmss(dst);
    
    	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
    		mss = tp->rx_opt.user_mss;
    
    
    Adam Langley's avatar
    Adam Langley committed
    	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
    		__u8 rcv_wscale;
    		/* Set this up on the first call only */
    		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
    
    
    		/* limit the window selection if the user enforce a smaller rx buffer */
    		if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
    		    (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
    			req->window_clamp = tcp_full_space(sk);
    
    
    Adam Langley's avatar
    Adam Langley committed
    		/* tcp_full_space because it is guaranteed to be the first packet */
    		tcp_select_initial_window(tcp_full_space(sk),
    
    			mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
    
    Adam Langley's avatar
    Adam Langley committed
    			&req->rcv_wnd,
    			&req->window_clamp,
    			ireq->wscale_ok,
    
    			&rcv_wscale,
    			dst_metric(dst, RTAX_INITRWND));
    
    Adam Langley's avatar
    Adam Langley committed
    		ireq->rcv_wscale = rcv_wscale;
    	}
    
    	memset(&opts, 0, sizeof(opts));
    
    #ifdef CONFIG_SYN_COOKIES
    	if (unlikely(req->cookie_ts))
    		TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
    	else
    #endif
    
    Adam Langley's avatar
    Adam Langley committed
    	TCP_SKB_CB(skb)->when = tcp_time_stamp;
    
    	tcp_header_size = tcp_synack_options(sk, req, mss,
    
    					     skb, &opts, &md5, xvp, foc)
    
    	skb_push(skb, tcp_header_size);
    	skb_reset_transport_header(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	memset(th, 0, sizeof(struct tcphdr));
    	th->syn = 1;
    	th->ack = 1;
    	TCP_ECN_make_synack(req, th);
    
    	th->source = ireq->loc_port;
    
    	/* Setting of flags are superfluous here for callers (and ECE is
    	 * not even correctly set)
    	 */
    	tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
    
    Changli Gao's avatar
    Changli Gao committed
    			     TCPHDR_SYN | TCPHDR_ACK);
    
    
    	if (OPTION_COOKIE_EXTENSION & opts.options) {
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		if (s_data_desired) {
    			u8 *buf = skb_put(skb, s_data_desired);
    
    
    			/* copy data directly from the listening socket. */
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    			memcpy(buf, cvp->s_data_payload, s_data_desired);
    			TCP_SKB_CB(skb)->end_seq += s_data_desired;
    
    		}
    
    		if (opts.hash_size > 0) {
    			__u32 workspace[SHA_WORKSPACE_WORDS];
    			u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
    			u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
    
    			/* Secret recipe depends on the Timestamp, (future)
    			 * Sequence and Acknowledgment Numbers, Initiator
    			 * Cookie, and others handled by IP variant caller.
    			 */
    			*tail-- ^= opts.tsval;
    			*tail-- ^= tcp_rsk(req)->rcv_isn + 1;
    			*tail-- ^= TCP_SKB_CB(skb)->seq + 1;
    
    			/* recommended */
    
    			*tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
    
    			*tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
    
    
    			sha_transform((__u32 *)&xvp->cookie_bakery[0],
    				      (char *)mess,
    				      &workspace[0]);
    			opts.hash_location =
    				(__u8 *)&xvp->cookie_bakery[0];
    		}
    	}
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	th->seq = htonl(TCP_SKB_CB(skb)->seq);
    
    	/* XXX data is queued and acked as is. No buffer/window check */
    	th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
    
    	th->window = htons(min(req->rcv_wnd, 65535U));
    
    	tcp_options_write((__be32 *)(th + 1), tp, &opts);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	th->doff = (tcp_header_size >> 2);
    
    	TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
    
    
    #ifdef CONFIG_TCP_MD5SIG
    	/* Okay, we have all we need - do the md5 hash if needed */
    	if (md5) {
    
    		tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
    
    					       md5, NULL, req, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return skb;
    }
    
    EXPORT_SYMBOL(tcp_make_synack);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /* Do all connect socket setups that can be done AF independent. */
    
    Pavel Emelyanov's avatar
    Pavel Emelyanov committed
    void tcp_connect_init(struct sock *sk)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	const struct dst_entry *dst = __sk_dst_get(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct tcp_sock *tp = tcp_sk(sk);
    	__u8 rcv_wscale;
    
    	/* We'll fix this up when we get a response from the other end.
    	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
    	 */
    	tp->tcp_header_len = sizeof(struct tcphdr) +
    
    		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    #ifdef CONFIG_TCP_MD5SIG
    	if (tp->af_specific->md5_lookup(sk, sk) != NULL)
    		tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
    #endif
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* If user gave his TCP_MAXSEG, record it to clamp */
    	if (tp->rx_opt.user_mss)
    		tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
    	tp->max_window = 0;
    
    John Heffner's avatar
    John Heffner committed
    	tcp_mtup_init(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tcp_sync_mss(sk, dst_mtu(dst));
    
    	if (!tp->window_clamp)
    		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
    
    	tp->advmss = dst_metric_advmss(dst);
    
    	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
    		tp->advmss = tp->rx_opt.user_mss;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tcp_initialize_rcv_mss(sk);
    
    
    	/* limit the window selection if the user enforce a smaller rx buffer */
    	if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
    	    (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
    		tp->window_clamp = tcp_full_space(sk);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tcp_select_initial_window(tcp_full_space(sk),
    				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
    				  &tp->rcv_wnd,
    				  &tp->window_clamp,
    
    				  sysctl_tcp_window_scaling,
    
    				  &rcv_wscale,
    				  dst_metric(dst, RTAX_INITRWND));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	tp->rx_opt.rcv_wscale = rcv_wscale;
    	tp->rcv_ssthresh = tp->rcv_wnd;
    
    	sk->sk_err = 0;
    	sock_reset_flag(sk, SOCK_DONE);
    	tp->snd_wnd = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tp->snd_una = tp->write_seq;
    	tp->snd_sml = tp->write_seq;
    
    	tp->snd_up = tp->write_seq;
    
    Pavel Emelyanov's avatar
    Pavel Emelyanov committed
    	tp->snd_nxt = tp->write_seq;
    
    
    	if (likely(!tp->repair))
    		tp->rcv_nxt = 0;
    	tp->rcv_wup = tp->rcv_nxt;
    	tp->copied_seq = tp->rcv_nxt;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
    	inet_csk(sk)->icsk_retransmits = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tcp_clear_retrans(tp);
    }
    
    
    static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
    
    	tcb->end_seq += skb->len;
    	skb_header_release(skb);
    	__tcp_add_write_queue_tail(sk, skb);
    	sk->sk_wmem_queued += skb->truesize;
    	sk_mem_charge(sk, skb->truesize);
    	tp->write_seq = tcb->end_seq;
    	tp->packets_out += tcp_skb_pcount(skb);
    }
    
    /* Build and send a SYN with data and (cached) Fast Open cookie. However,
     * queue a data-only packet after the regular SYN, such that regular SYNs
     * are retransmitted on timeouts. Also if the remote SYN-ACK acknowledges
     * only the SYN sequence, the data are retransmitted in the first ACK.
     * If cookie is not cached or other error occurs, falls back to send a
     * regular SYN with Fast Open cookie request option.
     */
    static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct tcp_fastopen_request *fo = tp->fastopen_req;
    
    	int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
    
    	struct sk_buff *syn_data = NULL, *data;
    
    	unsigned long last_syn_loss = 0;
    
    
    	tp->rx_opt.mss_clamp = tp->advmss;  /* If MSS is not cached */
    
    	tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
    			       &syn_loss, &last_syn_loss);
    	/* Recurring FO SYN losses: revert to regular handshake temporarily */
    	if (syn_loss > 1 &&
    	    time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
    		fo->cookie.len = -1;
    		goto fallback;
    	}
    
    	if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
    		fo->cookie.len = -1;
    	else if (fo->cookie.len <= 0)
    
    		goto fallback;
    
    	/* MSS for SYN-data is based on cached MSS and bounded by PMTU and
    	 * user-MSS. Reserve maximum option space for middleboxes that add
    	 * private TCP options. The cost is reduced data space in SYN :(
    	 */
    	if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
    		tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
    	space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
    		MAX_TCP_OPTION_SPACE;
    
    	syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
    				   sk->sk_allocation);
    	if (syn_data == NULL)
    		goto fallback;
    
    	for (i = 0; i < iovlen && syn_data->len < space; ++i) {
    		struct iovec *iov = &fo->data->msg_iov[i];
    		unsigned char __user *from = iov->iov_base;
    		int len = iov->iov_len;
    
    		if (syn_data->len + len > space)
    			len = space - syn_data->len;
    		else if (i + 1 == iovlen)
    			/* No more data pending in inet_wait_for_connect() */
    			fo->data = NULL;
    
    		if (skb_add_data(syn_data, from, len))
    			goto fallback;
    	}
    
    	/* Queue a data-only packet after the regular SYN for retransmission */
    	data = pskb_copy(syn_data, sk->sk_allocation);
    	if (data == NULL)
    		goto fallback;
    	TCP_SKB_CB(data)->seq++;
    	TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
    	TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
    	tcp_connect_queue_skb(sk, data);
    	fo->copied = data->len;
    
    	if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
    
    		tp->syn_data = (fo->copied > 0);
    
    		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
    		goto done;
    	}
    	syn_data = NULL;
    
    fallback:
    	/* Send a regular SYN with Fast Open cookie request option */
    	if (fo->cookie.len > 0)
    		fo->cookie.len = 0;
    	err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
    	if (err)
    		tp->syn_fastopen = 0;
    	kfree_skb(syn_data);
    done:
    	fo->cookie.len = -1;  /* Exclude Fast Open option for SYN retries */
    	return err;
    }
    
    
    /* Build a SYN and send it off. */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    int tcp_connect(struct sock *sk)
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct sk_buff *buff;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	tcp_connect_init(sk);
    
    
    	if (unlikely(tp->repair)) {
    		tcp_finish_connect(sk, NULL);
    		return 0;
    	}
    
    
    	buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);