Newer
Older
tcp_try_rmem_schedule(sk, skb->truesize))
goto drop;
skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
tcp_event_data_recv(sk, skb);
if (!skb_queue_empty(&tp->out_of_order_queue)) {
tcp_ofo_queue(sk);
/* RFC2581. 4.2. SHOULD send immediate ACK, when
* gap in queue is filled.
*/
if (skb_queue_empty(&tp->out_of_order_queue))
inet_csk(sk)->icsk_ack.pingpong = 0;
}
if (tp->rx_opt.num_sacks)
tcp_sack_remove(tp);
tcp_fast_path_check(sk);
if (eaten > 0)
__kfree_skb(skb);
else if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0);
return;
}
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
/* A retransmit, 2nd most common case. Force an immediate ack. */
NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST);
tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
out_of_window:
tcp_enter_quickack_mode(sk);
inet_csk_schedule_ack(sk);
drop:
__kfree_skb(skb);
return;
}
/* Out of window. F.e. zero window probe. */
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp)))
goto out_of_window;
tcp_enter_quickack_mode(sk);
if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
/* Partial packet, seq < rcv_next < end_seq */
SOCK_DEBUG(sk, "partial packet: rcv_next %X seq %X - %X\n",
tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(skb)->end_seq);
tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, tp->rcv_nxt);
/* If window is closed, drop tail of packet. But after
* remembering D-SACK for its head made in previous line.
*/
if (!tcp_receive_window(tp))
goto out_of_window;
goto queue_and_out;
}
TCP_ECN_check_ce(tp, skb);
if (tcp_try_rmem_schedule(sk, skb->truesize))
goto drop;
/* Disable header prediction. */
tp->pred_flags = 0;
inet_csk_schedule_ack(sk);
SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
skb_set_owner_r(skb, sk);
if (!skb_peek(&tp->out_of_order_queue)) {
/* Initial out of order segment, build 1 SACK. */
if (tcp_is_sack(tp)) {
tp->rx_opt.num_sacks = 1;
tp->rx_opt.dsack = 0;
tp->rx_opt.eff_sacks = 1;
tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
tp->selective_acks[0].end_seq =
TCP_SKB_CB(skb)->end_seq;
}
__skb_queue_head(&tp->out_of_order_queue, skb);
} else {
struct sk_buff *skb1 = tp->out_of_order_queue.prev;
u32 seq = TCP_SKB_CB(skb)->seq;
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (seq == TCP_SKB_CB(skb1)->end_seq) {
__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
if (!tp->rx_opt.num_sacks ||
tp->selective_acks[0].end_seq != seq)
goto add_sack;
/* Common case: data arrive in order after hole. */
tp->selective_acks[0].end_seq = end_seq;
return;
}
/* Find place to insert this segment. */
do {
if (!after(TCP_SKB_CB(skb1)->seq, seq))
break;
} while ((skb1 = skb1->prev) !=
(struct sk_buff *)&tp->out_of_order_queue);
if (skb1 != (struct sk_buff *)&tp->out_of_order_queue &&
before(seq, TCP_SKB_CB(skb1)->end_seq)) {
if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
/* All the bits are present. Drop. */
__kfree_skb(skb);
tcp_dsack_set(tp, seq, end_seq);
goto add_sack;
}
if (after(seq, TCP_SKB_CB(skb1)->seq)) {
/* Partial overlap. */
tcp_dsack_set(tp, seq,
TCP_SKB_CB(skb1)->end_seq);
} else {
skb1 = skb1->prev;
}
}
__skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue);
/* And clean segments covered by new one as whole. */
while ((skb1 = skb->next) !=
(struct sk_buff *)&tp->out_of_order_queue &&
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
end_seq);
break;
}
__skb_unlink(skb1, &tp->out_of_order_queue);
tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
TCP_SKB_CB(skb1)->end_seq);
__kfree_skb(skb1);
if (tcp_is_sack(tp))
tcp_sack_new_ofo_skb(sk, seq, end_seq);
}
}
/* Collapse contiguous sequence of skbs head..tail with
* sequence numbers start..end.
* Segments with FIN/SYN are not collapsed (only because this
* simplifies code)
*/
static void
tcp_collapse(struct sock *sk, struct sk_buff_head *list,
struct sk_buff *head, struct sk_buff *tail,
u32 start, u32 end)
/* First, check that queue is collapsible and find
for (skb = head; skb != tail;) {
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
struct sk_buff *next = skb->next;
__kfree_skb(skb);
NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
continue;
}
/* The first skb to collapse is:
* - not SYN/FIN and
* - bloated or contains data before "start" or
* overlaps to the next one.
*/
if (!tcp_hdr(skb)->syn && !tcp_hdr(skb)->fin &&
(tcp_win_from_space(skb->truesize) > skb->len ||
before(TCP_SKB_CB(skb)->seq, start) ||
(skb->next != tail &&
TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb->next)->seq)))
break;
/* Decided to skip this, advance start seq. */
start = TCP_SKB_CB(skb)->end_seq;
skb = skb->next;
}
if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin)
return;
while (before(start, end)) {
struct sk_buff *nskb;
unsigned int header = skb_headroom(skb);
int copy = SKB_MAX_ORDER(header, 0);
/* Too big header? This can happen with IPv6. */
if (copy < 0)
return;
if (end - start < copy)
copy = end - start;
nskb = alloc_skb(copy + header, GFP_ATOMIC);

Arnaldo Carvalho de Melo
committed
skb_set_mac_header(nskb, skb_mac_header(skb) - skb->head);
skb_set_network_header(nskb, (skb_network_header(skb) -
skb->head));
skb_set_transport_header(nskb, (skb_transport_header(skb) -
skb->head));
skb_reserve(nskb, header);
memcpy(nskb->head, skb->head, header);
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
skb_set_owner_r(nskb, sk);
/* Copy data, releasing collapsed skbs. */
while (copy > 0) {
int offset = start - TCP_SKB_CB(skb)->seq;
int size = TCP_SKB_CB(skb)->end_seq - start;
BUG_ON(offset < 0);
if (size > 0) {
size = min(copy, size);
if (skb_copy_bits(skb, offset, skb_put(nskb, size), size))
BUG();
TCP_SKB_CB(nskb)->end_seq += size;
copy -= size;
start += size;
}
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
struct sk_buff *next = skb->next;
__kfree_skb(skb);
NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED);
skb = next;
if (skb == tail ||
tcp_hdr(skb)->syn ||
tcp_hdr(skb)->fin)
4252
4253
4254
4255
4256
4257
4258
4259
4260
4261
4262
4263
4264
4265
4266
4267
4268
4269
4270
4271
4272
4273
4274
4275
4276
4277
4278
4279
4280
4281
4282
return;
}
}
}
}
/* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
* and tcp_collapse() them until all the queue is collapsed.
*/
static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
struct sk_buff *head;
u32 start, end;
if (skb == NULL)
return;
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
head = skb;
for (;;) {
skb = skb->next;
/* Segment is terminated when we see gap or when
* we are at the end of all the queue. */
if (skb == (struct sk_buff *)&tp->out_of_order_queue ||
after(TCP_SKB_CB(skb)->seq, end) ||
before(TCP_SKB_CB(skb)->end_seq, start)) {
tcp_collapse(sk, &tp->out_of_order_queue,
head, skb, start, end);
head = skb;
if (skb == (struct sk_buff *)&tp->out_of_order_queue)
break;
/* Start new segment */
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
} else {
if (before(TCP_SKB_CB(skb)->seq, start))
start = TCP_SKB_CB(skb)->seq;
if (after(TCP_SKB_CB(skb)->end_seq, end))
end = TCP_SKB_CB(skb)->end_seq;
}
}
}
/*
* Purge the out-of-order queue.
* Return true if queue was pruned.
static int tcp_prune_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
int res = 0;
if (!skb_queue_empty(&tp->out_of_order_queue)) {
NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
__skb_queue_purge(&tp->out_of_order_queue);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
* is in a sad state like this, we care only about integrity
* of the connection not performance.
*/
if (tp->rx_opt.sack_ok)
tcp_sack_reset(&tp->rx_opt);
sk_mem_reclaim(sk);
return res;
/* Reduce allocated memory if we can, trying to get
* the socket within its memory limits again.
*
* Return less than zero if we should start dropping frames
* until the socket owning process reads some of the data
* to stabilize the situation.
*/
static int tcp_prune_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
SOCK_DEBUG(sk, "prune_queue: c=%x\n", tp->copied_seq);
NET_INC_STATS_BH(LINUX_MIB_PRUNECALLED);
if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
tcp_clamp_window(sk);
else if (tcp_memory_pressure)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
tcp_collapse_ofo_queue(sk);
tcp_collapse(sk, &sk->sk_receive_queue,
sk->sk_receive_queue.next,
(struct sk_buff *)&sk->sk_receive_queue,
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
/* Collapsing did not help, destructive actions follow.
* This must not ever occur. */
tcp_prune_ofo_queue(sk);
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376
4377
4378
4379
4380
4381
4382
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
/* If we are really being abused, tell the caller to silently
* drop receive data on the floor. It will get retransmitted
* and hopefully then we'll have sufficient space.
*/
NET_INC_STATS_BH(LINUX_MIB_RCVPRUNED);
/* Massive buffer overcommit. */
tp->pred_flags = 0;
return -1;
}
/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
* As additional protections, we do not touch cwnd in retransmission phases,
* and if application hit its sndbuf limit recently.
*/
void tcp_cwnd_application_limited(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open &&
sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
/* Limited by application or receiver window. */
u32 init_win = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 win_used = max(tp->snd_cwnd_used, init_win);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1;
}
tp->snd_cwnd_used = 0;
}
tp->snd_cwnd_stamp = tcp_time_stamp;
}
static int tcp_should_expand_sndbuf(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
/* If the user specified a specific send buffer setting, do
* not modify it.
*/
if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
return 0;
/* If we are under global TCP memory pressure, do not expand. */
if (tcp_memory_pressure)
return 0;
/* If we are under soft global TCP memory pressure, do not expand. */
if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
return 0;
/* If we filled the congestion window, do not expand. */
if (tp->packets_out >= tp->snd_cwnd)
return 0;
return 1;
}
/* When incoming ACK allowed to free some skb from write_queue,
* we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket
* on the exit from tcp input handler.
*
* PROBLEM: sndbuf expansion does not work well with largesend.
*/
static void tcp_new_space(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_should_expand_sndbuf(sk)) {
int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1);
sndmem *= 2 * demanded;
if (sndmem > sk->sk_sndbuf)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
tp->snd_cwnd_stamp = tcp_time_stamp;
}
sk->sk_write_space(sk);
}
{
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
if (sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
tcp_new_space(sk);
}
}
static inline void tcp_data_snd_check(struct sock *sk)
tcp_push_pending_frames(sk);
tcp_check_space(sk);
}
/*
* Check if sending an ack is needed.
*/
static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
{
struct tcp_sock *tp = tcp_sk(sk);
/* More than one full frame received... */
if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss
/* ... and right edge of window advances far enough.
* (tcp_recvmsg() will send ACK otherwise). Or...
*/
&& __tcp_select_window(sk) >= tp->rcv_wnd) ||
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
(ofo_possible && skb_peek(&tp->out_of_order_queue))) {
/* Then ack it now */
tcp_send_ack(sk);
} else {
/* Else, send delayed ack. */
tcp_send_delayed_ack(sk);
}
}
static inline void tcp_ack_snd_check(struct sock *sk)
if (!inet_csk_ack_scheduled(sk)) {
/* We sent a data segment already. */
return;
}
__tcp_ack_snd_check(sk, 1);
}
/*
* This routine is only called when we have urgent data
* signaled. Its the 'slow' part of tcp_urg. It could be
* moved inline now as tcp_urg is only called from one
* place. We handle URGent data wrong. We have to - as
* BSD still doesn't use the correction from RFC961.
* For 1003.1g we should support a new option TCP_STDURG to permit
* either form (or just set the sysctl tcp_stdurg).
*/
static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
4507
4508
4509
4510
4511
4512
4513
4514
4515
4516
4517
4518
4519
4520
4521
4522
4523
4524
4525
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541
4542
{
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
if (ptr && !sysctl_tcp_stdurg)
ptr--;
ptr += ntohl(th->seq);
/* Ignore urgent data that we've already seen and read. */
if (after(tp->copied_seq, ptr))
return;
/* Do not replay urg ptr.
*
* NOTE: interesting situation not covered by specs.
* Misbehaving sender may send urg ptr, pointing to segment,
* which we already have in ofo queue. We are not able to fetch
* such data and will stay in TCP_URG_NOTYET until will be eaten
* by recvmsg(). Seems, we are not obliged to handle such wicked
* situations. But it is worth to think about possibility of some
* DoSes using some hypothetical application level deadlock.
*/
if (before(ptr, tp->rcv_nxt))
return;
/* Do we already have a newer (or duplicate) urgent pointer? */
if (tp->urg_data && !after(ptr, tp->urg_seq))
return;
/* Tell the world about our new urgent pointer. */
sk_send_sigurg(sk);
/* We may be adding urgent data when the last byte read was
* urgent. To do this requires some care. We cannot just ignore
* tp->copied_seq since we would read the last urgent byte again
* as data, nor can we alter copied_seq until this data arrives
* or we break the semantics of SIOCATMARK (and thus sockatmark())
*
* NOTE. Double Dutch. Rendering to plain English: author of comment
* above did something sort of send("A", MSG_OOB); send("B", MSG_OOB);
* and expect that both A and B disappear from stream. This is _wrong_.
* Though this happens in BSD with high probability, this is occasional.
* Any application relying on this is buggy. Note also, that fix "works"
* only in this artificial test. Insert some normal data between A and B and we will
* decline of BSD again. Verdict: it is better to remove to trap
* buggy users.
*/
if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
!sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
tp->copied_seq++;
if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
tp->urg_data = TCP_URG_NOTYET;
tp->urg_seq = ptr;
/* Disable header prediction. */
tp->pred_flags = 0;
}
/* This is the 'fast' part of urgent handling. */
static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Check if we get a new urgent pointer - normally not. */
if (th->urg)
/* Do we wait for any urgent data? - normally not... */
if (tp->urg_data == TCP_URG_NOTYET) {
u32 ptr = tp->urg_seq - ntohl(th->seq) + (th->doff * 4) -
th->syn;
/* Is the urgent pointer pointing into this packet? */
if (ptr < skb->len) {
u8 tmp;
if (skb_copy_bits(skb, ptr, &tmp, 1))
BUG();
tp->urg_data = TCP_URG_VALID | tmp;
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_data_ready(sk, 0);
}
}
}
static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
{
struct tcp_sock *tp = tcp_sk(sk);
int chunk = skb->len - hlen;
int err;
local_bh_enable();
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
else
err = skb_copy_and_csum_datagram_iovec(skb, hlen,
tp->ucopy.iov);
if (!err) {
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
tcp_rcv_space_adjust(sk);
}
local_bh_disable();
return err;
}
static __sum16 __tcp_checksum_complete_user(struct sock *sk,
struct sk_buff *skb)
if (sock_owned_by_user(sk)) {
local_bh_enable();
result = __tcp_checksum_complete(skb);
local_bh_disable();
} else {
result = __tcp_checksum_complete(skb);
}
return result;
}
static inline int tcp_checksum_complete_user(struct sock *sk,
struct sk_buff *skb)
return !skb_csum_unnecessary(skb) &&
__tcp_checksum_complete_user(sk, skb);
static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
int hlen)
{
struct tcp_sock *tp = tcp_sk(sk);
int chunk = skb->len - hlen;
int dma_cookie;
int copied_early = 0;
if (tp->ucopy.wakeup)
if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
tp->ucopy.dma_chan = get_softnet_dma();
if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
skb, hlen,
tp->ucopy.iov, chunk,
tp->ucopy.pinned_list);
if (dma_cookie < 0)
goto out;
tp->ucopy.dma_cookie = dma_cookie;
copied_early = 1;
tp->ucopy.len -= chunk;
tp->copied_seq += chunk;
tcp_rcv_space_adjust(sk);
if ((tp->ucopy.len == 0) ||
(tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
(atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
tp->ucopy.wakeup = 1;
sk->sk_data_ready(sk, 0);
}
} else if (chunk > 0) {
tp->ucopy.wakeup = 1;
sk->sk_data_ready(sk, 0);
}
out:
return copied_early;
}
#endif /* CONFIG_NET_DMA */
* TCP receive function for the ESTABLISHED state.
* It is split into a fast path and a slow path. The fast path is
* disabled when:
* - A zero window was announced from us - zero window probing
* is only handled properly in the slow path.
* - Out of order segments arrived.
* - Urgent data is expected.
* - There is no buffer space left
* - Unexpected TCP flags/window values/header lengths are received
* (detected by checking the TCP header against pred_flags)
* - Data is sent in both directions. Fast path only supports pure senders
* or pure receivers (this means either the sequence number or the ack
* value must stay constant)
* - Unexpected TCP option.
*
* When these conditions are not satisfied it drops into a standard
* receive procedure patterned after RFC793 to handle all cases.
* The first three cases are guaranteed by proper pred_flags setting,
* the rest is checked inline. Fast processing is turned on in
* tcp_data_queue when everything is OK.
*/
int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
/*
* Header prediction.
* The code loosely follows the one in the famous
*
* Van's trick is to deposit buffers into socket queue
* on a device interrupt, to call tcp_recv function
* on the receive process context and checksum and copy
* the buffer to user space. smart...
*
* Our current scheme is not silly either but we take the
* extra cost of the net_bh soft interrupt processing...
* We do checksum and copy also but from device to kernel.
*/
tp->rx_opt.saw_tstamp = 0;
/* pred_flags is 0xS?10 << 16 + snd_wnd
* 'S' will always be tp->tcp_header_len >> 2
* '?' will be 0 for the fast path, otherwise pred_flags is 0 to
* turn it off (when there are holes in the receive
* space for instance)
* PSH flag is ignored.
*/
if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
int tcp_header_len = tp->tcp_header_len;
/* Timestamp header prediction: tcp_header_len
* is automatically equal to th->doff*4 due to pred_flags
* match.
*/
/* Check timestamp */
if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
__be32 *ptr = (__be32 *)(th + 1);
if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP))
goto slow_path;
tp->rx_opt.saw_tstamp = 1;
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789
4790
4791
4792
4793
tp->rx_opt.rcv_tsval = ntohl(*ptr);
++ptr;
tp->rx_opt.rcv_tsecr = ntohl(*ptr);
/* If PAWS failed, check it more carefully in slow path */
if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
goto slow_path;
/* DO NOT update ts_recent here, if checksum fails
* and timestamp was corrupted part, it will result
* in a hung connection since we will drop all
* future packets due to the PAWS test.
*/
}
if (len <= tcp_header_len) {
/* Bulk data transfer: sender */
if (len == tcp_header_len) {
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
*/
if (tcp_header_len ==
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
/* We know that such packets are checksummed
* on entry.
*/
tcp_ack(sk, skb, 0);
tcp_data_snd_check(sk);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}
} else {
int eaten = 0;
if (tp->copied_seq == tp->rcv_nxt &&
len - tcp_header_len <= tp->ucopy.len) {
#ifdef CONFIG_NET_DMA
if (tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
copied_early = 1;
eaten = 1;
}
#endif
if (tp->ucopy.task == current &&
sock_owned_by_user(sk) && !copied_early) {
if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
eaten = 1;
}
if (eaten) {
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
*/
if (tcp_header_len ==
(sizeof(struct tcphdr) +
TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(sk, skb);
__skb_pull(skb, tcp_header_len);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
NET_INC_STATS_BH(LINUX_MIB_TCPHPHITSTOUSER);
}
if (copied_early)
tcp_cleanup_rbuf(sk, skb->len);
}
if (!eaten) {
if (tcp_checksum_complete_user(sk, skb))
goto csum_error;
/* Predicted packet is in window by definition.
* seq == rcv_nxt and rcv_wup <= rcv_nxt.
* Hence, check seq<=rcv_wup reduces to:
*/
if (tcp_header_len ==
(sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
tp->rcv_nxt == tp->rcv_wup)
tcp_store_ts_recent(tp);
tcp_rcv_rtt_measure_ts(sk, skb);
if ((int)skb->truesize > sk->sk_forward_alloc)
goto step5;
NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
__skb_pull(skb, tcp_header_len);
skb_set_owner_r(skb, sk);
tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
/* Well, only one small jumplet in fast path... */
tcp_ack(sk, skb, FLAG_DATA);
tcp_data_snd_check(sk);
if (!inet_csk_ack_scheduled(sk))
__tcp_ack_snd_check(sk, 0);
#ifdef CONFIG_NET_DMA
if (copied_early)
__skb_queue_tail(&sk->sk_async_wait_queue, skb);
else
#endif
if (eaten)
__kfree_skb(skb);
else
sk->sk_data_ready(sk, 0);
return 0;
}
}
slow_path:
if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
goto csum_error;
/*
* RFC1323: H1. Apply PAWS check first.
*/
if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&
tcp_paws_discard(sk, skb)) {
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915
4916
4917
4918
4919
4920
4921
4922
4923
4924
4925
4926
4927
4928
if (!th->rst) {
NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
tcp_send_dupack(sk, skb);
goto discard;
}
/* Resets are accepted even if PAWS failed.
ts_recent update must be made after we are sure
that the packet is in window.
*/
}
/*
* Standard slow path.
*/
if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {
/* RFC793, page 37: "In all states except SYN-SENT, all reset
* (RST) segments are validated by checking their SEQ-fields."
* And page 69: "If an incoming segment is not acceptable,
* an acknowledgment should be sent in reply (unless the RST bit
* is set, if so drop the segment and return)".
*/
if (!th->rst)
tcp_send_dupack(sk, skb);
goto discard;
}
tcp_reset(sk);
goto discard;
}
tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
tcp_reset(sk);
return 1;
}
step5:
tcp_rcv_rtt_measure_ts(sk, skb);
/* Process urgent data. */
tcp_urg(sk, skb, th);
/* step 7: process the segment text */
tcp_data_queue(sk, skb);
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
return 0;
csum_error:
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
discard:
__kfree_skb(skb);
return 0;
}
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, unsigned len)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981
4982
4983
4984
4985
4986
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
int saved_clamp = tp->rx_opt.mss_clamp;
tcp_parse_options(skb, &tp->rx_opt, 0);
if (th->ack) {
/* rfc793:
* "If the state is SYN-SENT then
* first check the ACK bit
* If the ACK bit is set
* If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
* a reset (unless the RST bit is set, if so drop
* the segment and return)"
*
* We do not send data with SYN, so that RFC-correct
* test reduces to:
*/
if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
goto reset_and_undo;
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
tcp_time_stamp)) {
NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
}
/* Now ACK is acceptable.
*
* "If the RST bit is set