Skip to content
Snippets Groups Projects
tcp_output.c 69.1 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    			sk_stream_free_skb(sk, skb);
    			sk_charge_skb(sk, nskb);
    			skb = nskb;
    		}
    
    		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
    		TCP_ECN_send_synack(tcp_sk(sk), skb);
    	}
    	TCP_SKB_CB(skb)->when = tcp_time_stamp;
    
    	return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    /*
     * Prepare a SYN-ACK.
     */
    struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
    
    				 struct request_sock *req)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct inet_request_sock *ireq = inet_rsk(req);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct tcphdr *th;
    	int tcp_header_size;
    	struct sk_buff *skb;
    
    	skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
    	if (skb == NULL)
    		return NULL;
    
    	/* Reserve space for headers. */
    	skb_reserve(skb, MAX_TCP_HEADER);
    
    	skb->dst = dst_clone(dst);
    
    	tcp_header_size = (sizeof(struct tcphdr) + TCPOLEN_MSS +
    
    			   (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0) +
    			   (ireq->wscale_ok ? TCPOLEN_WSCALE_ALIGNED : 0) +
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			   /* SACK_PERM is in the place of NOP NOP of TS */
    
    			   ((ireq->sack_ok && !ireq->tstamp_ok) ? TCPOLEN_SACKPERM_ALIGNED : 0));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	skb->h.th = th = (struct tcphdr *) skb_push(skb, tcp_header_size);
    
    	memset(th, 0, sizeof(struct tcphdr));
    	th->syn = 1;
    	th->ack = 1;
    	TCP_ECN_make_synack(req, th);
    	th->source = inet_sk(sk)->sport;
    
    	th->dest = ireq->rmt_port;
    	TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
    	TCP_SKB_CB(skb)->sacked = 0;
    
    	skb_shinfo(skb)->gso_segs = 1;
    	skb_shinfo(skb)->gso_size = 0;
    	skb_shinfo(skb)->gso_type = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	th->seq = htonl(TCP_SKB_CB(skb)->seq);
    
    	th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
    		__u8 rcv_wscale; 
    		/* Set this up on the first call only */
    		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
    		/* tcp_full_space because it is guaranteed to be the first packet */
    		tcp_select_initial_window(tcp_full_space(sk), 
    
    			dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			&req->rcv_wnd,
    			&req->window_clamp,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			&rcv_wscale);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
    	th->window = htons(req->rcv_wnd);
    
    	TCP_SKB_CB(skb)->when = tcp_time_stamp;
    
    	tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), ireq->tstamp_ok,
    			      ireq->sack_ok, ireq->wscale_ok, ireq->rcv_wscale,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			      TCP_SKB_CB(skb)->when,
    			      req->ts_recent);
    
    	skb->csum = 0;
    	th->doff = (tcp_header_size >> 2);
    	TCP_INC_STATS(TCP_MIB_OUTSEGS);
    	return skb;
    }
    
    /* 
     * Do all connect socket setups that can be done AF independent.
     */ 
    
    Stephen Hemminger's avatar
    Stephen Hemminger committed
    static void tcp_connect_init(struct sock *sk)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct dst_entry *dst = __sk_dst_get(sk);
    	struct tcp_sock *tp = tcp_sk(sk);
    	__u8 rcv_wscale;
    
    	/* We'll fix this up when we get a response from the other end.
    	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
    	 */
    	tp->tcp_header_len = sizeof(struct tcphdr) +
    		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
    
    	/* If user gave his TCP_MAXSEG, record it to clamp */
    	if (tp->rx_opt.user_mss)
    		tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
    	tp->max_window = 0;
    
    John Heffner's avatar
    John Heffner committed
    	tcp_mtup_init(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tcp_sync_mss(sk, dst_mtu(dst));
    
    	if (!tp->window_clamp)
    		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
    	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
    	tcp_initialize_rcv_mss(sk);
    
    	tcp_select_initial_window(tcp_full_space(sk),
    				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
    				  &tp->rcv_wnd,
    				  &tp->window_clamp,
    				  sysctl_tcp_window_scaling,
    				  &rcv_wscale);
    
    	tp->rx_opt.rcv_wscale = rcv_wscale;
    	tp->rcv_ssthresh = tp->rcv_wnd;
    
    	sk->sk_err = 0;
    	sock_reset_flag(sk, SOCK_DONE);
    	tp->snd_wnd = 0;
    	tcp_init_wl(tp, tp->write_seq, 0);
    	tp->snd_una = tp->write_seq;
    	tp->snd_sml = tp->write_seq;
    	tp->rcv_nxt = 0;
    	tp->rcv_wup = 0;
    	tp->copied_seq = 0;
    
    
    	inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
    	inet_csk(sk)->icsk_retransmits = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tcp_clear_retrans(tp);
    }
    
    /*
     * Build a SYN and send it off.
     */ 
    int tcp_connect(struct sock *sk)
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct sk_buff *buff;
    
    	tcp_connect_init(sk);
    
    
    	buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (unlikely(buff == NULL))
    		return -ENOBUFS;
    
    	/* Reserve space for headers. */
    	skb_reserve(buff, MAX_TCP_HEADER);
    
    	TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
    	TCP_ECN_send_syn(sk, tp, buff);
    	TCP_SKB_CB(buff)->sacked = 0;
    
    	skb_shinfo(buff)->gso_segs = 1;
    	skb_shinfo(buff)->gso_size = 0;
    	skb_shinfo(buff)->gso_type = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	buff->csum = 0;
    	TCP_SKB_CB(buff)->seq = tp->write_seq++;
    	TCP_SKB_CB(buff)->end_seq = tp->write_seq;
    	tp->snd_nxt = tp->write_seq;
    	tp->pushed_seq = tp->write_seq;
    
    	/* Send it off. */
    	TCP_SKB_CB(buff)->when = tcp_time_stamp;
    	tp->retrans_stamp = TCP_SKB_CB(buff)->when;
    	skb_header_release(buff);
    	__skb_queue_tail(&sk->sk_write_queue, buff);
    	sk_charge_skb(sk, buff);
    	tp->packets_out += tcp_skb_pcount(buff);
    
    	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
    
    	/* Timer for repeating the SYN until an answer. */
    
    	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
    				  inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    }
    
    /* Send out a delayed ack, the caller does the policy checking
     * to see if we should even be here.  See tcp_input.c:tcp_ack_snd_check()
     * for details.
     */
    void tcp_send_delayed_ack(struct sock *sk)
    {
    
    	struct inet_connection_sock *icsk = inet_csk(sk);
    	int ato = icsk->icsk_ack.ato;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	unsigned long timeout;
    
    	if (ato > TCP_DELACK_MIN) {
    
    		const struct tcp_sock *tp = tcp_sk(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		int max_ato = HZ/2;
    
    
    		if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			max_ato = TCP_DELACK_MAX;
    
    		/* Slow path, intersegment interval is "high". */
    
    		/* If some rtt estimate is known, use it to bound delayed ack.
    
    		 * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		 * directly.
    		 */
    		if (tp->srtt) {
    			int rtt = max(tp->srtt>>3, TCP_DELACK_MIN);
    
    			if (rtt < max_ato)
    				max_ato = rtt;
    		}
    
    		ato = min(ato, max_ato);
    	}
    
    	/* Stay within the limit we were given */
    	timeout = jiffies + ato;
    
    	/* Use new timeout only if there wasn't a older one earlier. */
    
    	if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/* If delack timer was blocked or is about to expire,
    		 * send ACK now.
    		 */
    
    		if (icsk->icsk_ack.blocked ||
    		    time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			tcp_send_ack(sk);
    			return;
    		}
    
    
    		if (!time_before(timeout, icsk->icsk_ack.timeout))
    			timeout = icsk->icsk_ack.timeout;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
    	icsk->icsk_ack.timeout = timeout;
    	sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    /* This routine sends an ack and also updates the window. */
    void tcp_send_ack(struct sock *sk)
    {
    	/* If we have been reset, we may not send again. */
    	if (sk->sk_state != TCP_CLOSE) {
    		struct tcp_sock *tp = tcp_sk(sk);
    		struct sk_buff *buff;
    
    		/* We are not putting this on the write queue, so
    		 * tcp_transmit_skb() will set the ownership to this
    		 * sock.
    		 */
    		buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
    		if (buff == NULL) {
    
    			inet_csk_schedule_ack(sk);
    			inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
    
    			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
    						  TCP_DELACK_MAX, TCP_RTO_MAX);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			return;
    		}
    
    		/* Reserve space for headers and prepare control bits. */
    		skb_reserve(buff, MAX_TCP_HEADER);
    		buff->csum = 0;
    		TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
    		TCP_SKB_CB(buff)->sacked = 0;
    
    		skb_shinfo(buff)->gso_segs = 1;
    		skb_shinfo(buff)->gso_size = 0;
    		skb_shinfo(buff)->gso_type = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		/* Send it off, this clears delayed acks for us. */
    		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
    		TCP_SKB_CB(buff)->when = tcp_time_stamp;
    
    		tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    }
    
    /* This routine sends a packet with an out of date sequence
     * number. It assumes the other end will try to ack it.
     *
     * Question: what should we make while urgent mode?
     * 4.4BSD forces sending single byte of data. We cannot send
     * out of window data, because we have SND.NXT==SND.MAX...
     *
     * Current solution: to send TWO zero-length segments in urgent mode:
     * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
     * out-of-date with SND.UNA-1 to probe window.
     */
    static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct sk_buff *skb;
    
    	/* We don't queue it, tcp_transmit_skb() sets ownership. */
    	skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
    	if (skb == NULL) 
    		return -1;
    
    	/* Reserve space for headers and set control bits. */
    	skb_reserve(skb, MAX_TCP_HEADER);
    	skb->csum = 0;
    	TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
    	TCP_SKB_CB(skb)->sacked = urgent;
    
    	skb_shinfo(skb)->gso_segs = 1;
    	skb_shinfo(skb)->gso_size = 0;
    	skb_shinfo(skb)->gso_type = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Use a previous sequence.  This should cause the other
    	 * end to send an ack.  Don't queue or clone SKB, just
    	 * send it.
    	 */
    	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
    	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
    	TCP_SKB_CB(skb)->when = tcp_time_stamp;
    
    	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    int tcp_write_wakeup(struct sock *sk)
    {
    	if (sk->sk_state != TCP_CLOSE) {
    		struct tcp_sock *tp = tcp_sk(sk);
    		struct sk_buff *skb;
    
    		if ((skb = sk->sk_send_head) != NULL &&
    		    before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
    			int err;
    			unsigned int mss = tcp_current_mss(sk, 0);
    			unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
    
    			if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
    				tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
    
    			/* We are probing the opening of a window
    			 * but the window size is != 0
    			 * must have been a result SWS avoidance ( sender )
    			 */
    			if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
    			    skb->len > mss) {
    				seg_size = min(seg_size, mss);
    				TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
    
    				if (tcp_fragment(sk, skb, seg_size, mss))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    					return -1;
    			} else if (!tcp_skb_pcount(skb))
    
    				tcp_set_skb_tso_segs(sk, skb, mss);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
    			TCP_SKB_CB(skb)->when = tcp_time_stamp;
    
    			err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (!err) {
    				update_send_head(sk, tp, skb);
    			}
    			return err;
    		} else {
    			if (tp->urg_mode &&
    			    between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF))
    				tcp_xmit_probe_skb(sk, TCPCB_URG);
    			return tcp_xmit_probe_skb(sk, 0);
    		}
    	}
    	return -1;
    }
    
    /* A window probe timeout has occurred.  If window is not closed send
     * a partial packet else a zero probe.
     */
    void tcp_send_probe0(struct sock *sk)
    {
    
    	struct inet_connection_sock *icsk = inet_csk(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct tcp_sock *tp = tcp_sk(sk);
    	int err;
    
    	err = tcp_write_wakeup(sk);
    
    	if (tp->packets_out || !sk->sk_send_head) {
    		/* Cancel probe timer, if it is not required. */
    
    		icsk->icsk_backoff = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return;
    	}
    
    	if (err <= 0) {
    
    		if (icsk->icsk_backoff < sysctl_tcp_retries2)
    			icsk->icsk_backoff++;
    
    		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 
    
    					  min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
    					  TCP_RTO_MAX);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else {
    		/* If packet was not sent due to local congestion,
    
    		 * do not backoff and do not remember icsk_probes_out.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		 * Let local senders to fight for local resources.
    		 *
    		 * Use accumulated backoff yet.
    		 */
    
    		if (!icsk->icsk_probes_out)
    			icsk->icsk_probes_out = 1;
    
    		inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, 
    					  min(icsk->icsk_rto << icsk->icsk_backoff,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    }
    
    EXPORT_SYMBOL(tcp_connect);
    EXPORT_SYMBOL(tcp_make_synack);
    EXPORT_SYMBOL(tcp_simple_retransmit);
    EXPORT_SYMBOL(tcp_sync_mss);
    
    EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
    
    John Heffner's avatar
    John Heffner committed
    EXPORT_SYMBOL(tcp_mtup_init);