Newer
Older
return 0;
if (skb == tcp_send_head(sk))
return 0;
/* Some heurestics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return 0;
return 1;
}
/* Collapse packets in the retransmit queue to make to create
* less packets on the wire. This is only done on retransmission.
*/
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
int space)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb = to, *tmp;
int first = 1;
if (!sysctl_tcp_retrans_collapse)
return;
if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)
return;
tcp_for_write_queue_from_safe(skb, tmp, sk) {
if (!tcp_can_collapse(sk, skb))
break;
space -= skb->len;
if (first) {
first = 0;
continue;
}
if (space < 0)
break;
/* Punt if not enough space exists in the first SKB for
* the data in the second
*/
if (skb->len > skb_tailroom(to))
break;
if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
break;
tcp_collapse_retrans(sk, to);
}
}
/* This retransmits one SKB. Policy decisions and retransmit queue
* state updates are done by the caller. Returns non-zero if an
* error occurred which prevented the send.
*/
int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int cur_mss;
/* Inconslusive MTU probe */
if (icsk->icsk_mtup.probe_size) {
icsk->icsk_mtup.probe_size = 0;
}
/* Do not sent more than we queued. 1/4 is reserved for possible
* copying overhead: fragmentation, tunneling, mangling etc.
*/
if (atomic_read(&sk->sk_wmem_alloc) >
min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
return -EAGAIN;
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
BUG();
if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
return -ENOMEM;
}
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */
/* If receiver has shrunk his window, and skb is out of
* new window, do not retransmit it. The exception is the
* case, when window is shrunk to zero. In this case
* our retransmit serves as a zero window probe.
*/
if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
TCP_SKB_CB(skb)->seq != tp->snd_una)
if (tcp_fragment(sk, skb, cur_mss, cur_mss))
int oldpcount = tcp_skb_pcount(skb);
if (unlikely(oldpcount > 1)) {
tcp_init_tso_segs(sk, skb, cur_mss);
tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
}
tcp_retrans_try_collapse(sk, skb, cur_mss);
/* Some Solaris stacks overoptimize and ignore the FIN on a
* retransmit when old data is attached. So strip it off
* since it is cheap to do so and saves bytes on the network.
*/
if (skb->len > 0 &&
(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
/* Reuse, even though it does some unnecessary work */
tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
TCP_SKB_CB(skb)->flags);
skb->ip_summed = CHECKSUM_NONE;
}
}
/* Make a copy, if the first transmission SKB clone we made
* is still in somebody's hands, else make a clone.
*/
TCP_SKB_CB(skb)->when = tcp_time_stamp;
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (err == 0) {
/* Update global TCP statistics. */
TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
tp->total_retrans++;
#if FASTRETRANS_DEBUG > 0
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
if (net_ratelimit())
printk(KERN_DEBUG "retrans_out leaked.\n");
}
#endif
if (!tp->retrans_out)
tp->lost_retrans_low = tp->snd_nxt;
TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
tp->retrans_out += tcp_skb_pcount(skb);
/* Save stamp of the first retransmit. */
if (!tp->retrans_stamp)
tp->retrans_stamp = TCP_SKB_CB(skb)->when;
tp->undo_retrans++;
/* snd_nxt is stored to detect loss of retransmitted segment,
* see tcp_input.c tcp_sacktag_write_queue().
*/
TCP_SKB_CB(skb)->ack_seq = tp->snd_nxt;
}
return err;
}
/* Check if we forward retransmits are possible in the current
* window/congestion state.
*/
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
static int tcp_can_forward_retransmit(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
/* Forward retransmissions are possible only during Recovery. */
if (icsk->icsk_ca_state != TCP_CA_Recovery)
return 0;
/* No forward retransmissions in Reno are possible. */
if (tcp_is_reno(tp))
return 0;
/* Yeah, we have to make difficult choice between forward transmission
* and retransmission... Both ways have their merits...
*
* For now we do not retransmit anything, while we have some new
* segments to send. In the other cases, follow rule 3 for
* NextSeg() specified in RFC3517.
*/
if (tcp_may_send_now(sk))
return 0;
return 1;
}
/* This gets called after a retransmit timeout, and the initially
* retransmitted data is acknowledged. It tries to continue
* resending the rest of the retransmit queue, until either
* we've sent it all or the congestion window limit is reached.
* If doing SACK, the first ACK which comes back for a timeout
* based retransmit packet might feed us FACK information again.
* If so, we use it to avoid unnecessarily retransmissions.
*/
void tcp_xmit_retransmit_queue(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
if (!tp->lost_out)
tp->retransmit_high = tp->snd_una;
if (tp->retransmit_skb_hint) {
last_lost = TCP_SKB_CB(skb)->end_seq;
if (after(last_lost, tp->retransmit_high))
last_lost = tp->retransmit_high;
} else {
skb = tcp_write_queue_head(sk);
last_lost = tp->snd_una;
}
tcp_for_write_queue_from(skb, sk) {
__u8 sacked = TCP_SKB_CB(skb)->sacked;
if (skb == tcp_send_head(sk))
break;
/* we could do better than to assign each time */
if (hole == NULL)
tp->retransmit_skb_hint = skb;
/* Assume this retransmit will generate
* only one packet for congestion window
* calculation purposes. This works because
* tcp_retransmit_skb() will chop up the
* packet to be MSS sized and all the
* packet counting works out.
*/
if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
return;
if (fwd_rexmitting) {
begin_fwd:
if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
break;
mib_idx = LINUX_MIB_TCPFORWARDRETRANS;
} else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) {
tp->retransmit_high = last_lost;
if (!tcp_can_forward_retransmit(sk))
break;
/* Backtrack if necessary to non-L'ed skb */
if (hole != NULL) {
skb = hole;
hole = NULL;
}
fwd_rexmitting = 1;
goto begin_fwd;
} else if (!(sacked & TCPCB_LOST)) {
if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
last_lost = TCP_SKB_CB(skb)->end_seq;
if (icsk->icsk_ca_state != TCP_CA_Loss)
mib_idx = LINUX_MIB_TCPFASTRETRANS;
else
mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS;
}
if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
if (tcp_retransmit_skb(sk, skb))
return;
NET_INC_STATS_BH(sock_net(sk), mib_idx);
if (skb == tcp_write_queue_head(sk))

Arnaldo Carvalho de Melo
committed
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
TCP_RTO_MAX);
}
}
/* Send a fin. The caller locks the socket for us. This cannot be
* allowed to fail queueing a FIN frame under any circumstances.
*/
void tcp_send_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb = tcp_write_queue_tail(sk);
/* Optimization, tack on the FIN if we have a queue of
* unsent frames. But be careful about outgoing SACKS
* and IP options.
*/
if (tcp_send_head(sk) != NULL) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
TCP_SKB_CB(skb)->end_seq++;
tp->write_seq++;
} else {
/* Socket is locked, keep trying until memory is available. */
for (;;) {
skb = alloc_skb_fclone(MAX_TCP_HEADER,
sk->sk_allocation);
if (skb)
break;
yield();
}
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
tcp_init_nondata_skb(skb, tp->write_seq,
TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
}
/* We get here when a process closes a file descriptor (either due to
* an explicit close() or as a byproduct of exit()'ing) and there
* was unread data in the receive queue. This behavior is recommended
* by RFC 2525, section 2.17. -DaveM
void tcp_send_active_reset(struct sock *sk, gfp_t priority)
{
struct sk_buff *skb;
/* NOTE: No TCP options attached and we never retransmit this. */
skb = alloc_skb(MAX_TCP_HEADER, priority);
if (!skb) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
return;
}
/* Reserve space for headers and prepare control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
/* Send it off. */
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTRSTS);
/* Send a crossed SYN-ACK during socket establishment.
* WARNING: This routine must only be called when we have already sent
* a SYN packet that crossed the incoming SYN that caused this routine
* to get called. If this assumption fails then the initial rcv_wnd
* and rcv_wscale values will not be correct.
*/
int tcp_send_synack(struct sock *sk)
{
skb = tcp_write_queue_head(sk);
if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
return -EFAULT;
}
if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
if (skb_cloned(skb)) {
struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
if (nskb == NULL)
return -ENOMEM;
tcp_unlink_write_queue(skb, sk);
__tcp_add_write_queue_head(sk, nskb);
sk_wmem_free_skb(sk, skb);
sk->sk_wmem_queued += nskb->truesize;
sk_mem_charge(sk, nskb->truesize);
skb = nskb;
}
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
TCP_ECN_send_synack(tcp_sk(sk), skb);
}
TCP_SKB_CB(skb)->when = tcp_time_stamp;
return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
/* Prepare a SYN-ACK. */
struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct request_values *rvp)
struct tcp_out_options opts;
struct tcp_extend_values *xvp = tcp_xv(rvp);

Arnaldo Carvalho de Melo
committed
struct inet_request_sock *ireq = inet_rsk(req);
const struct tcp_cookie_values *cvp = tp->cookie_values;
struct tcp_md5sig_key *md5;
int tcp_header_size;
if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
s_data_desired = cvp->s_data_desired;
skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC);
if (skb == NULL)
return NULL;
/* Reserve space for headers. */
skb_reserve(skb, MAX_TCP_HEADER);
mss = dst_metric(dst, RTAX_ADVMSS);
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
mss = tp->rx_opt.user_mss;
if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
__u8 rcv_wscale;
/* Set this up on the first call only */
req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
/* tcp_full_space because it is guaranteed to be the first packet */
tcp_select_initial_window(tcp_full_space(sk),
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
&req->rcv_wnd,
&req->window_clamp,
ireq->wscale_ok,
&rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
}
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
TCP_SKB_CB(skb)->when = cookie_init_timestamp(req);
else
#endif
tcp_header_size = tcp_synack_options(sk, req, mss,
skb, &opts, &md5, xvp)
+ sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
th = tcp_hdr(skb);
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
TCP_ECN_make_synack(req, th);

Arnaldo Carvalho de Melo
committed
th->dest = ireq->rmt_port;
/* Setting of flags are superfluous here for callers (and ECE is
* not even correctly set)
*/
tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
if (OPTION_COOKIE_EXTENSION & opts.options) {
if (s_data_desired) {
u8 *buf = skb_put(skb, s_data_desired);
/* copy data directly from the listening socket. */
memcpy(buf, cvp->s_data_payload, s_data_desired);
TCP_SKB_CB(skb)->end_seq += s_data_desired;
}
if (opts.hash_size > 0) {
__u32 workspace[SHA_WORKSPACE_WORDS];
u32 *mess = &xvp->cookie_bakery[COOKIE_DIGEST_WORDS];
u32 *tail = &mess[COOKIE_MESSAGE_WORDS-1];
/* Secret recipe depends on the Timestamp, (future)
* Sequence and Acknowledgment Numbers, Initiator
* Cookie, and others handled by IP variant caller.
*/
*tail-- ^= opts.tsval;
*tail-- ^= tcp_rsk(req)->rcv_isn + 1;
*tail-- ^= TCP_SKB_CB(skb)->seq + 1;
/* recommended */
*tail-- ^= ((th->dest << 16) | th->source);
*tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
sha_transform((__u32 *)&xvp->cookie_bakery[0],
(char *)mess,
&workspace[0]);
opts.hash_location =
(__u8 *)&xvp->cookie_bakery[0];
}
}

Arnaldo Carvalho de Melo
committed
th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rcv_wnd, 65535U));
tcp_options_write((__be32 *)(th + 1), tp, &opts);
TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
if (md5) {
tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
/* Do all connect socket setups that can be done AF independent. */
{
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
__u8 rcv_wscale;
/* We'll fix this up when we get a response from the other end.
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
tp->tcp_header_len = sizeof(struct tcphdr) +
(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
#ifdef CONFIG_TCP_MD5SIG
if (tp->af_specific->md5_lookup(sk, sk) != NULL)
tp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
/* If user gave his TCP_MAXSEG, record it to clamp */
if (tp->rx_opt.user_mss)
tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
tp->max_window = 0;
tcp_sync_mss(sk, dst_mtu(dst));
if (!tp->window_clamp)
tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
tp->advmss = dst_metric(dst, RTAX_ADVMSS);
if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
tp->advmss = tp->rx_opt.user_mss;
tcp_initialize_rcv_mss(sk);
tcp_select_initial_window(tcp_full_space(sk),
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
sysctl_tcp_window_scaling,
&rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
tp->rx_opt.rcv_wscale = rcv_wscale;
tp->rcv_ssthresh = tp->rcv_wnd;
sk->sk_err = 0;
sock_reset_flag(sk, SOCK_DONE);
tp->snd_wnd = 0;
tp->snd_una = tp->write_seq;
tp->snd_sml = tp->write_seq;
tp->rcv_nxt = 0;
tp->rcv_wup = 0;
tp->copied_seq = 0;
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
inet_csk(sk)->icsk_retransmits = 0;
/* Build a SYN and send it off. */
int tcp_connect(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
tcp_connect_init(sk);
buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
if (unlikely(buff == NULL))
return -ENOBUFS;
/* Reserve space for headers. */
skb_reserve(buff, MAX_TCP_HEADER);
tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
TCP_ECN_send_syn(sk, buff);
/* Send it off. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
skb_header_release(buff);
__tcp_add_write_queue_tail(sk, buff);
sk->sk_wmem_queued += buff->truesize;
sk_mem_charge(sk, buff->truesize);
tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
/* We change tp->snd_nxt after the tcp_transmit_skb() call
* in order to make this packet get counted in tcpOutSegs.
*/
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
/* Timer for repeating the SYN until an answer. */

Arnaldo Carvalho de Melo
committed
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
return 0;
}
/* Send out a delayed ack, the caller does the policy checking
* to see if we should even be here. See tcp_input.c:tcp_ack_snd_check()
* for details.
*/
void tcp_send_delayed_ack(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int ato = icsk->icsk_ack.ato;
unsigned long timeout;
if (ato > TCP_DELACK_MIN) {
const struct tcp_sock *tp = tcp_sk(sk);
if (icsk->icsk_ack.pingpong ||
(icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
max_ato = TCP_DELACK_MAX;
/* Slow path, intersegment interval is "high". */
/* If some rtt estimate is known, use it to bound delayed ack.
* Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
if (rtt < max_ato)
max_ato = rtt;
}
ato = min(ato, max_ato);
}
/* Stay within the limit we were given */
timeout = jiffies + ato;
/* Use new timeout only if there wasn't a older one earlier. */
if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
/* If delack timer was blocked or is about to expire,
* send ACK now.
*/
if (icsk->icsk_ack.blocked ||
time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {
if (!time_before(timeout, icsk->icsk_ack.timeout))
timeout = icsk->icsk_ack.timeout;
icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
icsk->icsk_ack.timeout = timeout;
sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
}
/* This routine sends an ack and also updates the window. */
void tcp_send_ack(struct sock *sk)
{
struct sk_buff *buff;
/* If we have been reset, we may not send again. */
if (sk->sk_state == TCP_CLOSE)
return;
/* We are not putting this on the write queue, so
* tcp_transmit_skb() will set the ownership to this
* sock.
*/
buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (buff == NULL) {
inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
return;
/* Reserve space for headers and prepare control bits. */
skb_reserve(buff, MAX_TCP_HEADER);
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
/* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->when = tcp_time_stamp;
tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
}
/* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it.
*
* Question: what should we make while urgent mode?
* 4.4BSD forces sending single byte of data. We cannot send
* out of window data, because we have SND.NXT==SND.MAX...
*
* Current solution: to send TWO zero-length segments in urgent mode:
* one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
* out-of-date with SND.UNA-1 to probe window.
*/
static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
/* We don't queue it, tcp_transmit_skb() sets ownership. */
skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
return -1;
/* Reserve space for headers and set control bits. */
skb_reserve(skb, MAX_TCP_HEADER);
/* Use a previous sequence. This should cause the other
* end to send an ack. Don't queue or clone SKB, just
* send it.
*/
tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
/* Initiate keepalive or window probe from timer. */
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
if (sk->sk_state == TCP_CLOSE)
return -1;
if ((skb = tcp_send_head(sk)) != NULL &&
before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
int err;
unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
/* We are probing the opening of a window
* but the window size is != 0
* must have been a result SWS avoidance ( sender )
*/
if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
skb->len > mss) {
seg_size = min(seg_size, mss);
if (tcp_fragment(sk, skb, seg_size, mss))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(sk, skb, mss);
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
if (!err)
tcp_event_new_data_sent(sk, skb);
return err;
} else {
if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
tcp_xmit_probe_skb(sk, 1);
return tcp_xmit_probe_skb(sk, 0);
}
}
/* A window probe timeout has occurred. If window is not closed send
* a partial packet else a zero probe.
*/
void tcp_send_probe0(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int err;
err = tcp_write_wakeup(sk);
if (tp->packets_out || !tcp_send_head(sk)) {
icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0;
if (icsk->icsk_backoff < sysctl_tcp_retries2)
icsk->icsk_backoff++;
icsk->icsk_probes_out++;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,

Arnaldo Carvalho de Melo
committed
min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX),
TCP_RTO_MAX);
} else {
/* If packet was not sent due to local congestion,
* do not backoff and do not remember icsk_probes_out.
* Let local senders to fight for local resources.
*
* Use accumulated backoff yet.
*/
if (!icsk->icsk_probes_out)
icsk->icsk_probes_out = 1;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
min(icsk->icsk_rto << icsk->icsk_backoff,

Arnaldo Carvalho de Melo
committed
TCP_RESOURCE_PROBE_INTERVAL),
TCP_RTO_MAX);
EXPORT_SYMBOL(tcp_select_initial_window);
EXPORT_SYMBOL(tcp_connect);
EXPORT_SYMBOL(tcp_make_synack);
EXPORT_SYMBOL(tcp_simple_retransmit);
EXPORT_SYMBOL(tcp_sync_mss);