Newer
Older
/* All the bits are present. Drop. */
__kfree_skb(skb);
tcp_dsack_set(tp, seq, end_seq);
goto add_sack;
}
if (after(seq, TCP_SKB_CB(skb1)->seq)) {
/* Partial overlap. */
tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq);
} else {
skb1 = skb1->prev;
}
}
__skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
/* And clean segments covered by new one as whole. */
while ((skb1 = skb->next) !=
(struct sk_buff*)&tp->out_of_order_queue &&
after(end_seq, TCP_SKB_CB(skb1)->seq)) {
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
break;
}
tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
__kfree_skb(skb1);
}
add_sack:
if (tcp_is_sack(tp))
tcp_sack_new_ofo_skb(sk, seq, end_seq);
}
}
/* Collapse contiguous sequence of skbs head..tail with
* sequence numbers start..end.
* Segments with FIN/SYN are not collapsed (only because this
* simplifies code)
*/
static void
tcp_collapse(struct sock *sk, struct sk_buff_head *list,
struct sk_buff *head, struct sk_buff *tail,
u32 start, u32 end)
/* First, check that queue is collapsible and find
* the point where collapsing can be useful. */
for (skb = head; skb != tail; ) {
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
struct sk_buff *next = skb->next;
__kfree_skb(skb);
NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
continue;
}
/* The first skb to collapse is:
* - not SYN/FIN and
* - bloated or contains data before "start" or
* overlaps to the next one.
*/
if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
(tcp_win_from_space(skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start) ||
(skb->next != tail &&
TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq)))
break;
/* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq;
skb = skb->next;
}
if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
return;
while (before(start, end)) {
struct sk_buff *nskb;
unsigned int header = skb_headroom(skb);
int copy = SKB_MAX_ORDER(header, 0);
/* Too big header? This can happen with IPv6. */
if (copy < 0)
return;
if (end-start < copy)
copy = end-start;
nskb = alloc_skb(copy+header, GFP_ATOMIC);
if (!nskb)
return;

Arnaldo Carvalho de Melo
committed
skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
skb_set_network_header(nskb, (skb_network_header(skb) -
skb->head));
skb_set_transport_header(nskb, (skb_transport_header(skb) -
skb->head));
skb_reserve(nskb, header);
memcpy(nskb->head, skb->head, header);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
sk_stream_set_owner_r(nskb, sk);
/* Copy data, releasing collapsed skbs. */
while (copy > 0) {
int offset = start - TCP_SKB_CB(skb)->seq;
int size = TCP_SKB_CB(skb)->end_seq - start;
BUG_ON(offset < 0);
if (size > 0) {
size = min(copy, size);
if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
BUG();
TCP_SKB_CB(nskb)->end_seq += size;
copy -= size;
start += size;
}
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
struct sk_buff *next = skb->next;
__kfree_skb(skb);
NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
if (skb == tail ||
tcp_hdr(skb)->syn ||
tcp_hdr(skb)->fin)
4127
4128
4129
4130
4131
4132
4133
4134
4135
4136
4137
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148
4149
4150
4151
4152
4153
4154
4155
4156
4157
return;
}
}
}
}
/* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
* and tcp_collapse() them until all the queue is collapsed.
*/
static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
struct sk_buff *head;
u32 start, end;
if (skb == NULL)
return;
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
head = skb;
for (;;) {
skb = skb->next;
/* Segment is terminated when we see gap or when
* we are at the end of all the queue. */
if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
after(TCP_SKB_CB(skb)->seq, end) ||
before(TCP_SKB_CB(skb)->end_seq, start)) {
tcp_collapse(sk, &tp->out_of_order_queue,
head, skb, start, end);
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169
4170
4171
4172
4173
4174
4175
4176
4177
4178
4179
4180
4181
4182
4183
head = skb;
if (skb == (struct sk_buff *)&tp->out_of_order_queue)
break;
/* Start new segment */
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
} else {
if (before(TCP_SKB_CB(skb)->seq, start))
start = TCP_SKB_CB(skb)->seq;
if (after(TCP_SKB_CB(skb)->end_seq, end))
end = TCP_SKB_CB(skb)->end_seq;
}
}
}
/* Reduce allocated memory if we can, trying to get
* the socket within its memory limits again.
*
* Return less than zero if we should start dropping frames
* until the socket owning process reads some of the data
* to stabilize the situation.
*/
static int tcp_prune_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
else if (tcp_memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk);
tcp_collapse(sk, &sk->sk_receive_queue,
sk->sk_receive_queue.next,
(struct sk_buff*)&sk->sk_receive_queue,
tp->copied_seq, tp->rcv_nxt);
sk_stream_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
/* Collapsing did not help, destructive actions follow.
* This must not ever occur. */
/* First, purge the out_of_order queue. */
if (!skb_queue_empty(&tp->out_of_order_queue)) {
NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
__skb_queue_purge(&tp->out_of_order_queue);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
* is in a sad state like this, we care only about integrity
* of the connection not performance.
*/
if (tcp_is_sack(tp))
4219
4220
4221
4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
4243
4244
4245
tcp_sack_reset(&tp->rx_opt);
sk_stream_mem_reclaim(sk);
}
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
/* If we are really being abused, tell the caller to silently
* drop receive data on the floor. It will get retransmitted
* and hopefully then we'll have sufficient space.
*/
NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED);
/* Massive buffer overcommit. */
tp->pred_flags = 0;
return -1;
}
/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
* As additional protections, we do not touch cwnd in retransmission phases,
* and if application hit its sndbuf limit recently.
*/
void tcp_cwnd_application_limited(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
/* Limited by application or receiver window. */
u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 win_used = max(tp->snd_cwnd_used, init_win);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
}
tp->snd_cwnd_used = 0;
}
tp->snd_cwnd_stamp = tcp_time_stamp;
}
static int tcp_should_expand_sndbuf(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
/* If the user specified a specific send buffer setting, do
* not modify it.
*/
if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
return 0;
/* If we are under global TCP memory pressure, do not expand. */
if (tcp_memory_pressure)
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
return 0;
/* If we filled the congestion window, do not expand. */
if (tp->packets_out >= tp->snd_cwnd)
return 0;
return 1;
}
/* When incoming ACK allowed to free some skb from write_queue,
* we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket
* on the exit from tcp input handler.
*
* PROBLEM: sndbuf expansion does not work well with largesend.
*/
static void tcp_new_space(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_should_expand_sndbuf(sk)) {
int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1);
sndmem *= 2*demanded;
if (sndmem > sk->sk_sndbuf)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
tp->snd_cwnd_stamp = tcp_time_stamp;
}
sk->sk_write_space(sk);
}
{
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
if (sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
tcp_new_space(sk);
}
}
static inline void tcp_data_snd_check(struct sock *sk)
tcp_push_pending_frames(sk);
tcp_check_space(sk);
}
/*
* Check if sending an ack is needed.
*/
static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
{
struct tcp_sock *tp = tcp_sk(sk);
/* More than one full frame received... */
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss
/* ... and right edge of window advances far enough.
* (tcp_recvmsg() will send ACK otherwise). Or...
*/
&& __tcp_select_window(sk) >= tp->rcv_wnd) ||
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
/* We have out of order data. */
(ofo_possible &&
skb_peek(&tp->out_of_order_queue))) {
/* Then ack it now */
tcp_send_ack(sk);
} else {
/* Else, send delayed ack. */
tcp_send_delayed_ack(sk);
}
}
static inline void tcp_ack_snd_check(struct sock *sk)
if (!inet_csk_ack_scheduled(sk)) {
/* We sent a data segment already. */
return;
}
__tcp_ack_snd_check(sk, 1);
}
/*
* This routine is only called when we have urgent data
* signaled. Its the 'slow' part of tcp_urg. It could be
* moved inline now as tcp_urg is only called from one
* place. We handle URGent data wrong. We have to - as
* BSD still doesn't use the correction from RFC961.
* For 1003.1g we should support a new option TCP_STDURG to permit
* either form (or just set the sysctl tcp_stdurg).
*/
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
4383
4384
4385
4386
4387
4388
4389
4390
4391
4392
4393
4394
4395
4396
4397
4398
4399
4400
4401
4402
4403
4404
4405
4406
static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
if (ptr && !sysctl_tcp_stdurg)
ptr--;
ptr += ntohl(th->seq);
/* Ignore urgent data that we've already seen and read. */
if (after(tp->copied_seq, ptr))
return;
/* Do not replay urg ptr.
*
* NOTE: interesting situation not covered by specs.
* Misbehaving sender may send urg ptr, pointing to segment,
* which we already have in ofo queue. We are not able to fetch
* such data and will stay in TCP_URG_NOTYET until will be eaten
* by recvmsg(). Seems, we are not obliged to handle such wicked
* situations. But it is worth to think about possibility of some
* DoSes using some hypothetical application level deadlock.
*/
if (before(ptr, tp->rcv_nxt))
return;
/* Do we already have a newer (or duplicate) urgent pointer? */
if (tp->urg_data && !after(ptr, tp->urg_seq))
return;
/* Tell the world about our new urgent pointer. */
sk_send_sigurg(sk);
/* We may be adding urgent data when the last byte read was
* urgent. To do this requires some care. We cannot just ignore
* tp->copied_seq since we would read the last urgent byte again
* as data, nor can we alter copied_seq until this data arrives
* or we break the semantics of SIOCATMARK (and thus sockatmark())
*
* NOTE. Double Dutch. Rendering to plain English: author of comment
* above did something sort of send("A", MSG_OOB); send("B", MSG_OOB);
* and expect that both A and B disappear from stream. This is _wrong_.
* Though this happens in BSD with high probability, this is occasional.
* Any application relying on this is buggy. Note also, that fix "works"
* only in this artificial test. Insert some normal data between A and B and we will
* decline of BSD again. Verdict: it is better to remove to trap
* buggy users.
*/
if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
!sock_flag(sk, SOCK_URGINLINE) &&
tp->copied_seq != tp->rcv_nxt) {
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
tp->copied_seq++;
if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435
4436
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
__kfree_skb(skb);
}
}
tp->urg_data = TCP_URG_NOTYET;
tp->urg_seq = ptr;
/* Disable header prediction. */
tp->pred_flags = 0;
}
/* This is the 'fast' part of urgent handling. */
static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Check if we get a new urgent pointer - normally not. */
if (th->urg)
tcp_check_urg(sk,th);
/* Do we wait for any urgent data? - normally not... */
if (tp->urg_data == TCP_URG_NOTYET) {
u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
th->syn;
/* Is the urgent pointer pointing into this packet? */
if (ptr < skb->len) {
u8 tmp;
if (skb_copy_bits(skb, ptr, &tmp, 1))
BUG();
tp->urg_data = TCP_URG_VALID | tmp;
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0);
}
}
}
static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
{
struct tcp_sock *tp = tcp_sk(sk);
int chunk = skb->len - hlen;
int err;
local_bh_enable();
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
else
err = skb_copy_and_csum_datagram_iovec(skb, hlen,
tp->ucopy.iov);
if (!err) {
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
tcp_rcv_space_adjust(sk);
}
local_bh_disable();
return err;
}
static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
if (sock_owned_by_user(sk)) {
local_bh_enable();
result = __tcp_checksum_complete(skb);
local_bh_disable();
} else {
result = __tcp_checksum_complete(skb);
}
return result;
}
static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
return !skb_csum_unnecessary(skb) &&
#ifdef CONFIG_NET_DMA
static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
{
struct tcp_sock *tp = tcp_sk(sk);
int chunk = skb->len - hlen;
int dma_cookie;
int copied_early = 0;
if (tp->ucopy.wakeup)
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
tp->ucopy.dma_chan = get_softnet_dma();
if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
if (dma_cookie < 0)
goto out;
tp->ucopy.dma_cookie = dma_cookie;
copied_early = 1;
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
tcp_rcv_space_adjust(sk);
if ((tp->ucopy.len == 0) ||
(tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
(atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
tp->ucopy.wakeup = 1;
sk->sk_data_ready(sk, 0);
}
} else if (chunk > 0) {
tp->ucopy.wakeup = 1;
sk->sk_data_ready(sk, 0);
}
out:
return copied_early;
}
#endif /* CONFIG_NET_DMA */
* TCP receive function for the ESTABLISHED state.
* It is split into a fast path and a slow path. The fast path is
* disabled when:
* - A zero window was announced from us - zero window probing
* is only handled properly in the slow path.
* - Out of order segments arrived.
* - Urgent data is expected.
* - There is no buffer space left
* - Unexpected TCP flags/window values/header lengths are received
* (detected by checking the TCP header against pred_flags)
* - Data is sent in both directions. Fast path only supports pure senders
* or pure receivers (this means either the sequence number or the ack
* value must stay constant)
* - Unexpected TCP option.
*
* When these conditions are not satisfied it drops into a standard
* receive procedure patterned after RFC793 to handle all cases.
* The first three cases are guaranteed by proper pred_flags setting,
* the rest is checked inline. Fast processing is turned on in
* tcp_data_queue when everything is OK.
*/
int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
/*
* Header prediction.
* The code loosely follows the one in the famous
*
* Van's trick is to deposit buffers into socket queue
* on a device interrupt, to call tcp_recv function
* on the receive process context and checksum and copy
* the buffer to user space. smart...
*
* Our current scheme is not silly either but we take the
* extra cost of the net_bh soft interrupt processing...
* We do checksum and copy also but from device to kernel.
*/
tp->rx_opt.saw_tstamp = 0;
/* pred_flags is 0xS?10 << 16 + snd_wnd
* 'S' will always be tp->tcp_header_len >> 2
* '?' will be 0 for the fast path, otherwise pred_flags is 0 to
* turn it off (when there are holes in the receive
* space for instance)
* PSH flag is ignored.
*/
if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
int tcp_header_len = tp->tcp_header_len;
/* Timestamp header prediction: tcp_header_len
* is automatically equal to th->doff*4 due to pred_flags
* match.
*/
/* Check timestamp */
if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
__be32 *ptr = (__be32 *)(th + 1);
if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
goto slow_path;
tp->rx_opt.saw_tstamp = 1;
4623
4624
4625
4626
4627
4628
4629
4630
4631
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643
4644
4645
4646
4647
4648
4649
4650
4651
4652
4653
tp->rx_opt.rcv_tsval = ntohl(*ptr);
++ptr;
tp->rx_opt.rcv_tsecr = ntohl(*ptr);
/* If PAWS failed, check it more carefully in slow path */
if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
goto slow_path;
/* DO NOT update ts_recent here, if checksum fails
* and timestamp was corrupted part, it will result
* in a hung connection since we will drop all
* future packets due to the PAWS test.
*/
}
if (len <= tcp_header_len) {
/* Bulk data transfer: sender */
if (len == tcp_header_len) {
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
*/
if (tcp_header_len ==
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
/* We know that such packets are checksummed
* on entry.
*/
tcp_ack(sk, skb, 0);
tcp_data_snd_check(sk);
return 0;
} else { /* Header too small */
TCP_INC_STATS_BH(TCP_MIB_INERRS);
goto discard;
}
} else {
int eaten = 0;
if (tp->copied_seq == tp->rcv_nxt &&
len - tcp_header_len <= tp->ucopy.len) {
#ifdef CONFIG_NET_DMA
if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
copied_early = 1;
eaten = 1;
}
#endif
if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) {
__set_current_state(TASK_RUNNING);
if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
eaten = 1;
}
if (eaten) {
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
*/
if (tcp_header_len ==
(sizeof(struct tcphdr) +
TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(sk, skb);
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
}
if (copied_early)
tcp_cleanup_rbuf(sk, skb->len);
}
if (!eaten) {
if (tcp_checksum_complete_user(sk, skb))
goto csum_error;
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
*/
if (tcp_header_len ==
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(sk, skb);
if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5;
NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
__skb_pull(skb,tcp_header_len);
__skb_queue_tail(&sk->sk_receive_queue, skb);
sk_stream_set_owner_r(skb, sk);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
}
tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
/* Well, only one small jumplet in fast path... */
tcp_ack(sk, skb, FLAG_DATA);
tcp_data_snd_check(sk);
if (!inet_csk_ack_scheduled(sk))
__tcp_ack_snd_check(sk, 0);
#ifdef CONFIG_NET_DMA
if (copied_early)
__skb_queue_tail(&sk->sk_async_wait_queue, skb);
else
#endif
if (eaten)
__kfree_skb(skb);
else
sk->sk_data_ready(sk, 0);
return 0;
}
}
slow_path:
if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb))
goto csum_error;
/*
* RFC1323: H1. Apply PAWS check first.
*/
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
if (!th->rst) {
NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
tcp_send_dupack(sk, skb);
goto discard;
}
/* Resets are accepted even if PAWS failed.
ts_recent update must be made after we are sure
that the packet is in window.
*/
}
/*
* Standard slow path.
*/
if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
* And page 69: "If an incoming segment is not acceptable,
* an acknowledgment should be sent in reply (unless the RST bit
* is set, if so drop the segment and return)".
*/
if (!th->rst)
tcp_send_dupack(sk, skb);
goto discard;
}
tcp_reset(sk);
goto discard;
}
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
TCP_INC_STATS_BH(TCP_MIB_INERRS);
NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
tcp_reset(sk);
return 1;
}
step5:
tcp_rcv_rtt_measure_ts(sk, skb);
/* Process urgent data. */
tcp_urg(sk, skb, th);
/* step 7: process the segment text */
tcp_data_queue(sk, skb);
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
return 0;
csum_error:
TCP_INC_STATS_BH(TCP_MIB_INERRS);
discard:
__kfree_skb(skb);
return 0;
}
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
4831
4832
4833
4834
4835
4836
4837
4838
4839
4840
4841
4842
4843
4844
4845
4846
4847
4848
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861
4862
4863
4864
4865
4866
4867
4868
4869
4870
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885
4886
4887
4888
4889
4890
4891
4892
4893
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
int saved_clamp = tp->rx_opt.mss_clamp;
tcp_parse_options(skb, &tp->rx_opt, 0);
if (th->ack) {
/* rfc793:
* "If the state is SYN-SENT then
* first check the ACK bit
* If the ACK bit is set
* If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
* a reset (unless the RST bit is set, if so drop
* the segment and return)"
*
* We do not send data with SYN, so that RFC-correct
* test reduces to:
*/
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
goto reset_and_undo;
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp)) {
NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
}
/* Now ACK is acceptable.
*
* "If the RST bit is set
* If the ACK was acceptable then signal the user "error:
* connection reset", drop the segment, enter CLOSED state,
* delete TCB, and return."
*/
if (th->rst) {
tcp_reset(sk);
goto discard;
}
/* rfc793:
* "fifth, if neither of the SYN or RST bits is set then
* drop the segment and return."
*
* See note below!
* --ANK(990513)
*/
if (!th->syn)
goto discard_and_undo;
/* rfc793:
* "If the SYN bit is on ...
* are acceptable then ...
* (our SYN has been ACKed), change the connection
* state to ESTABLISHED..."
*/
TCP_ECN_rcv_synack(tp, th);
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
tcp_ack(sk, skb, FLAG_SLOWPATH);
/* Ok.. it's good. Set up sequence numbers and
* move to established.
*/
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
* never scaled.
*/
tp->snd_wnd = ntohs(th->window);
tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
if (!tp->rx_opt.wscale_ok) {
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
tp->window_clamp = min(tp->window_clamp, 65535U);
}
if (tp->rx_opt.saw_tstamp) {
tp->rx_opt.tstamp_ok = 1;
tp->tcp_header_len =
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
tcp_store_ts_recent(tp);
} else {
tp->tcp_header_len = sizeof(struct tcphdr);
}
if (tcp_is_sack(tp) && sysctl_tcp_fack)
tcp_enable_fack(tp);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
tp->copied_seq = tp->rcv_nxt;
security_inet_conn_established(sk, skb);
/* Make sure socket is routed, for correct metrics. */

Arnaldo Carvalho de Melo
committed
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_congestion_control(sk);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
tp->lsndtime = tcp_time_stamp;
tcp_init_buffer_space(sk);
if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
if (!tp->rx_opt.snd_wscale)
__tcp_fast_path_on(tp, tp->snd_wnd);
else
tp->pred_flags = 0;
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);

Arnaldo Carvalho de Melo
committed
if (sk->sk_write_pending ||
icsk->icsk_accept_queue.rskq_defer_accept ||
icsk->icsk_ack.pingpong) {
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
* It may be deleted, but with this feature tcpdumps
* look so _wonderfully_ clever, that I was not able
* to stand against the temptation 8) --ANK
*/
inet_csk_schedule_ack(sk);

Arnaldo Carvalho de Melo
committed
icsk->icsk_ack.lrcvtime = tcp_time_stamp;
icsk->icsk_ack.ato = TCP_ATO_MIN;
tcp_incr_quickack(sk);
tcp_enter_quickack_mode(sk);

Arnaldo Carvalho de Melo
committed
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
discard:
__kfree_skb(skb);
return 0;
} else {
tcp_send_ack(sk);
}
return -1;
}
/* No ACK in the segment */
if (th->rst) {
/* rfc793:
* "If the RST bit is set
*
* Otherwise (no ACK) drop the segment and return."
*/
goto discard_and_undo;
}