Skip to content
Snippets Groups Projects
tcp_ipv4.c 75.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
    		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
    
    		goto discard_and_relse;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
    		goto discard_and_relse;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (sk_filter(sk, skb))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto discard_and_relse;
    
    	skb->dev = NULL;
    
    
    	bh_lock_sock_nested(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	ret = 0;
    	if (!sock_owned_by_user(sk)) {
    
    #ifdef CONFIG_NET_DMA
    		struct tcp_sock *tp = tcp_sk(sk);
    		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
    
    			tp->ucopy.dma_chan = net_dma_find_channel();
    
    		if (tp->ucopy.dma_chan)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			ret = tcp_v4_do_rcv(sk, skb);
    
    		else
    #endif
    		{
    			if (!tcp_prequeue(sk, skb))
    
    Shan Wei's avatar
    Shan Wei committed
    				ret = tcp_v4_do_rcv(sk, skb);
    
    	} else if (unlikely(sk_add_backlog(sk, skb,
    					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
    
    Zhu Yi's avatar
    Zhu Yi committed
    		bh_unlock_sock(sk);
    
    		NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
    
    Zhu Yi's avatar
    Zhu Yi committed
    		goto discard_and_relse;
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	bh_unlock_sock(sk);
    
    	sock_put(sk);
    
    	return ret;
    
    no_tcp_socket:
    	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
    		goto discard_it;
    
    	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
    bad_packet:
    
    		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else {
    
    		tcp_v4_send_reset(NULL, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    discard_it:
    	/* Discard frame. */
    	kfree_skb(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    discard_and_relse:
    	sock_put(sk);
    	goto discard_it;
    
    do_time_wait:
    	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto discard_it;
    	}
    
    	if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) {
    
    		TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto discard_it;
    	}
    
    	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	case TCP_TW_SYN: {
    
    		struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (sk2) {
    
    			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
    			inet_twsk_put(inet_twsk(sk));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			sk = sk2;
    			goto process;
    		}
    		/* Fall through to ACK */
    	}
    	case TCP_TW_ACK:
    		tcp_v4_timewait_ack(sk, skb);
    		break;
    	case TCP_TW_RST:
    		goto no_tcp_socket;
    	case TCP_TW_SUCCESS:;
    	}
    	goto discard_it;
    }
    
    
    static struct timewait_sock_ops tcp_timewait_sock_ops = {
    	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
    	.twsk_unique	= tcp_twsk_unique,
    	.twsk_destructor= tcp_twsk_destructor,
    };
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
    
    {
    	struct dst_entry *dst = skb_dst(skb);
    
    	dst_hold(dst);
    	sk->sk_rx_dst = dst;
    	inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
    }
    
    EXPORT_SYMBOL(inet_sk_rx_dst_set);
    
    const struct inet_connection_sock_af_ops ipv4_specific = {
    
    	.queue_xmit	   = ip_queue_xmit,
    	.send_check	   = tcp_v4_send_check,
    	.rebuild_header	   = inet_sk_rebuild_header,
    
    	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
    
    	.conn_request	   = tcp_v4_conn_request,
    	.syn_recv_sock	   = tcp_v4_syn_recv_sock,
    	.net_header_len	   = sizeof(struct iphdr),
    	.setsockopt	   = ip_setsockopt,
    	.getsockopt	   = ip_getsockopt,
    	.addr2sockaddr	   = inet_csk_addr2sockaddr,
    	.sockaddr_len	   = sizeof(struct sockaddr_in),
    
    	.bind_conflict	   = inet_csk_bind_conflict,
    
    #ifdef CONFIG_COMPAT
    
    	.compat_setsockopt = compat_ip_setsockopt,
    	.compat_getsockopt = compat_ip_getsockopt,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    };
    
    EXPORT_SYMBOL(ipv4_specific);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    #ifdef CONFIG_TCP_MD5SIG
    
    static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
    
    	.md5_lookup		= tcp_v4_md5_lookup,
    
    	.calc_md5_hash		= tcp_v4_md5_hash_skb,
    
    	.md5_parse		= tcp_v4_parse_md5_keys,
    };
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* NOTE: A lot of things set to zero explicitly by call to
     *       sk_alloc() so need not be done here.
     */
    static int tcp_v4_init_sock(struct sock *sk)
    {
    
    	struct inet_connection_sock *icsk = inet_csk(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    #ifdef CONFIG_TCP_MD5SIG
    
    	tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
    
    void tcp_v4_destroy_sock(struct sock *sk)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    
    	tcp_clear_xmit_timers(sk);
    
    
    	tcp_cleanup_congestion_control(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Cleanup up the write buffer. */
    
    	tcp_write_queue_purge(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Cleans up our, hopefully empty, out_of_order_queue. */
    
    	__skb_queue_purge(&tp->out_of_order_queue);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    #ifdef CONFIG_TCP_MD5SIG
    	/* Clean up the MD5 key list, if any */
    	if (tp->md5sig_info) {
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		tcp_clear_md5_list(sk);
    
    		kfree_rcu(tp->md5sig_info, rcu);
    
    #ifdef CONFIG_NET_DMA
    	/* Cleans up our sk_async_wait_queue */
    
    	__skb_queue_purge(&sk->sk_async_wait_queue);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Clean prequeue, it must be empty really */
    	__skb_queue_purge(&tp->ucopy.prequeue);
    
    	/* Clean up a referenced TCP bind bucket. */
    
    	if (inet_csk(sk)->icsk_bind_hash)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/* TCP Cookie Transactions */
    	if (tp->cookie_values != NULL) {
    		kref_put(&tp->cookie_values->kref,
    			 tcp_cookie_values_release);
    		tp->cookie_values = NULL;
    	}
    
    	BUG_ON(tp->fastopen_rsk != NULL);
    
    	/* If socket is aborted during connect operation */
    	tcp_free_fastopen_req(tp);
    
    
    	sk_sockets_allocated_dec(sk);
    
    	sock_release_memcg(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    EXPORT_SYMBOL(tcp_v4_destroy_sock);
    
    #ifdef CONFIG_PROC_FS
    /* Proc filesystem TCP sock list dumping. */
    
    
    static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	return hlist_nulls_empty(head) ? NULL :
    
    		list_entry(head->first, struct inet_timewait_sock, tw_node);
    
    static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	return !is_a_nulls(tw->tw_node.next) ?
    		hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL;
    
    /*
     * Get next listener socket follow cur.  If cur is NULL, get first socket
     * starting from bucket given in st->bucket; when st->bucket is zero the
     * very first socket in the hash table is returned.
     */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    static void *listening_get_next(struct seq_file *seq, void *cur)
    {
    
    	struct inet_connection_sock *icsk;
    
    	struct hlist_nulls_node *node;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct sock *sk = cur;
    
    	struct inet_listen_hashbucket *ilb;
    
    	struct tcp_iter_state *st = seq->private;
    
    	struct net *net = seq_file_net(seq);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (!sk) {
    
    		ilb = &tcp_hashinfo.listening_hash[st->bucket];
    
    		spin_lock_bh(&ilb->lock);
    
    		sk = sk_nulls_head(&ilb->head);
    
    		st->offset = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto get_sk;
    	}
    
    	ilb = &tcp_hashinfo.listening_hash[st->bucket];
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	++st->num;
    
    	++st->offset;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (st->state == TCP_SEQ_STATE_OPENREQ) {
    
    		struct request_sock *req = cur;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		icsk = inet_csk(st->syn_wait_sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		req = req->dl_next;
    		while (1) {
    			while (req) {
    
    				if (req->rsk_ops->family == st->family) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    					cur = req;
    					goto out;
    				}
    				req = req->dl_next;
    			}
    
    			if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				break;
    get_req:
    
    			req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    		sk	  = sk_nulls_next(st->syn_wait_sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		st->state = TCP_SEQ_STATE_LISTENING;
    
    		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else {
    
    		icsk = inet_csk(sk);
    
    		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
    		if (reqsk_queue_len(&icsk->icsk_accept_queue))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			goto start_req;
    
    		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
    
    		sk = sk_nulls_next(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    get_sk:
    
    	sk_nulls_for_each_from(sk, node) {
    
    		if (!net_eq(sock_net(sk), net))
    			continue;
    		if (sk->sk_family == st->family) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			cur = sk;
    			goto out;
    		}
    
    		icsk = inet_csk(sk);
    
    		read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
    		if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    start_req:
    			st->uid		= sock_i_uid(sk);
    			st->syn_wait_sk = sk;
    			st->state	= TCP_SEQ_STATE_OPENREQ;
    			st->sbucket	= 0;
    			goto get_req;
    		}
    
    		read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	spin_unlock_bh(&ilb->lock);
    
    	st->offset = 0;
    
    	if (++st->bucket < INET_LHTABLE_SIZE) {
    
    		ilb = &tcp_hashinfo.listening_hash[st->bucket];
    		spin_lock_bh(&ilb->lock);
    
    		sk = sk_nulls_head(&ilb->head);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto get_sk;
    	}
    	cur = NULL;
    out:
    	return cur;
    }
    
    static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
    {
    
    	struct tcp_iter_state *st = seq->private;
    	void *rc;
    
    	st->bucket = 0;
    	st->offset = 0;
    	rc = listening_get_next(seq, NULL);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	while (rc && *pos) {
    		rc = listening_get_next(seq, rc);
    		--*pos;
    	}
    	return rc;
    }
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    static inline bool empty_bucket(struct tcp_iter_state *st)
    
    	return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
    		hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
    
    /*
     * Get first established socket starting from bucket given in st->bucket.
     * If st->bucket is zero, the very first socket in the hash is returned.
     */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    static void *established_get_first(struct seq_file *seq)
    {
    
    	struct tcp_iter_state *st = seq->private;
    
    	struct net *net = seq_file_net(seq);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	void *rc = NULL;
    
    
    	st->offset = 0;
    	for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		struct sock *sk;
    
    		struct hlist_nulls_node *node;
    
    		spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* Lockless fast path for the common case of empty buckets */
    		if (empty_bucket(st))
    			continue;
    
    
    		spin_lock_bh(lock);
    
    		sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
    
    			if (sk->sk_family != st->family ||
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				continue;
    			}
    			rc = sk;
    			goto out;
    		}
    		st->state = TCP_SEQ_STATE_TIME_WAIT;
    
    				   &tcp_hashinfo.ehash[st->bucket].twchain) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				continue;
    			}
    			rc = tw;
    			goto out;
    		}
    
    		spin_unlock_bh(lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		st->state = TCP_SEQ_STATE_ESTABLISHED;
    	}
    out:
    	return rc;
    }
    
    static void *established_get_next(struct seq_file *seq, void *cur)
    {
    	struct sock *sk = cur;
    
    	struct hlist_nulls_node *node;
    
    	struct tcp_iter_state *st = seq->private;
    
    	struct net *net = seq_file_net(seq);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	++st->num;
    
    	++st->offset;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (st->state == TCP_SEQ_STATE_TIME_WAIT) {
    		tw = cur;
    		tw = tw_next(tw);
    get_tw:
    
    		while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			tw = tw_next(tw);
    		}
    		if (tw) {
    			cur = tw;
    			goto out;
    		}
    
    		spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		st->state = TCP_SEQ_STATE_ESTABLISHED;
    
    
    		/* Look for next non empty bucket */
    
    		st->offset = 0;
    
    		while (++st->bucket <= tcp_hashinfo.ehash_mask &&
    
    		if (st->bucket > tcp_hashinfo.ehash_mask)
    
    		spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
    
    		sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	sk_nulls_for_each_from(sk, node) {
    
    		if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			goto found;
    	}
    
    	st->state = TCP_SEQ_STATE_TIME_WAIT;
    
    	tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	goto get_tw;
    found:
    	cur = sk;
    out:
    	return cur;
    }
    
    static void *established_get_idx(struct seq_file *seq, loff_t pos)
    {
    
    	struct tcp_iter_state *st = seq->private;
    	void *rc;
    
    	st->bucket = 0;
    	rc = established_get_first(seq);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	while (rc && pos) {
    		rc = established_get_next(seq, rc);
    		--pos;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return rc;
    }
    
    static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
    {
    	void *rc;
    
    	struct tcp_iter_state *st = seq->private;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	st->state = TCP_SEQ_STATE_LISTENING;
    	rc	  = listening_get_idx(seq, &pos);
    
    	if (!rc) {
    		st->state = TCP_SEQ_STATE_ESTABLISHED;
    		rc	  = established_get_idx(seq, pos);
    	}
    
    	return rc;
    }
    
    
    static void *tcp_seek_last_pos(struct seq_file *seq)
    {
    	struct tcp_iter_state *st = seq->private;
    	int offset = st->offset;
    	int orig_num = st->num;
    	void *rc = NULL;
    
    	switch (st->state) {
    	case TCP_SEQ_STATE_OPENREQ:
    	case TCP_SEQ_STATE_LISTENING:
    		if (st->bucket >= INET_LHTABLE_SIZE)
    			break;
    		st->state = TCP_SEQ_STATE_LISTENING;
    		rc = listening_get_next(seq, NULL);
    		while (offset-- && rc)
    			rc = listening_get_next(seq, rc);
    		if (rc)
    			break;
    		st->bucket = 0;
    		/* Fallthrough */
    	case TCP_SEQ_STATE_ESTABLISHED:
    	case TCP_SEQ_STATE_TIME_WAIT:
    		st->state = TCP_SEQ_STATE_ESTABLISHED;
    		if (st->bucket > tcp_hashinfo.ehash_mask)
    			break;
    		rc = established_get_first(seq);
    		while (offset-- && rc)
    			rc = established_get_next(seq, rc);
    	}
    
    	st->num = orig_num;
    
    	return rc;
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
    {
    
    	struct tcp_iter_state *st = seq->private;
    
    	void *rc;
    
    	if (*pos && *pos == st->last_pos) {
    		rc = tcp_seek_last_pos(seq);
    		if (rc)
    			goto out;
    	}
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	st->state = TCP_SEQ_STATE_LISTENING;
    	st->num = 0;
    
    	st->bucket = 0;
    	st->offset = 0;
    	rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
    
    out:
    	st->last_pos = *pos;
    	return rc;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    {
    
    	struct tcp_iter_state *st = seq->private;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	void *rc = NULL;
    
    	if (v == SEQ_START_TOKEN) {
    		rc = tcp_get_idx(seq, 0);
    		goto out;
    	}
    
    	switch (st->state) {
    	case TCP_SEQ_STATE_OPENREQ:
    	case TCP_SEQ_STATE_LISTENING:
    		rc = listening_get_next(seq, v);
    		if (!rc) {
    			st->state = TCP_SEQ_STATE_ESTABLISHED;
    
    			st->bucket = 0;
    			st->offset = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			rc	  = established_get_first(seq);
    		}
    		break;
    	case TCP_SEQ_STATE_ESTABLISHED:
    	case TCP_SEQ_STATE_TIME_WAIT:
    		rc = established_get_next(seq, v);
    		break;
    	}
    out:
    	++*pos;
    
    	st->last_pos = *pos;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return rc;
    }
    
    static void tcp_seq_stop(struct seq_file *seq, void *v)
    {
    
    	struct tcp_iter_state *st = seq->private;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	switch (st->state) {
    	case TCP_SEQ_STATE_OPENREQ:
    		if (v) {
    
    			struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
    			read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	case TCP_SEQ_STATE_LISTENING:
    		if (v != SEQ_START_TOKEN)
    
    			spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		break;
    	case TCP_SEQ_STATE_TIME_WAIT:
    	case TCP_SEQ_STATE_ESTABLISHED:
    		if (v)
    
    			spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
    
    int tcp_seq_open(struct inode *inode, struct file *file)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct tcp_seq_afinfo *afinfo = PDE(inode)->data;
    	struct tcp_iter_state *s;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	err = seq_open_net(inode, file, &afinfo->seq_ops,
    			  sizeof(struct tcp_iter_state));
    	if (err < 0)
    		return err;
    
    	s = ((struct seq_file *)file->private_data)->private;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	s->family		= afinfo->family;
    
    	s->last_pos 		= 0;
    
    int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	int rc = 0;
    	struct proc_dir_entry *p;
    
    
    	afinfo->seq_ops.start		= tcp_seq_start;
    	afinfo->seq_ops.next		= tcp_seq_next;
    	afinfo->seq_ops.stop		= tcp_seq_stop;
    
    
    	p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		rc = -ENOMEM;
    	return rc;
    }
    
    EXPORT_SYMBOL(tcp_proc_register);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	proc_net_remove(net, afinfo->name);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(tcp_proc_unregister);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static void get_openreq4(const struct sock *sk, const struct request_sock *req,
    
    			 struct seq_file *f, int i, kuid_t uid, int *len)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	const struct inet_request_sock *ireq = inet_rsk(req);
    
    	long delta = req->expires - jiffies;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
    
    		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n",
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		i,
    
    		ntohs(inet_sk(sk)->inet_sport),
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		TCP_SYN_RECV,
    		0, 0, /* could print option size, but that is af dependent. */
    		1,    /* timers active (only the expire timer) */
    
    		jiffies_delta_to_clock_t(delta),
    
    		req->num_timeout,
    
    		from_kuid_munged(seq_user_ns(f), uid),
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		0,  /* non standard timer */
    		0, /* open_requests have no inode */
    		atomic_read(&sk->sk_refcnt),
    
    static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	int timer_active;
    	unsigned long timer_expires;
    
    	const struct tcp_sock *tp = tcp_sk(sk);
    
    	const struct inet_connection_sock *icsk = inet_csk(sk);
    
    	const struct inet_sock *inet = inet_sk(sk);
    
    	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
    
    	__be32 dest = inet->inet_daddr;
    	__be32 src = inet->inet_rcv_saddr;
    	__u16 destp = ntohs(inet->inet_dport);
    	__u16 srcp = ntohs(inet->inet_sport);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		timer_active	= 1;
    
    		timer_expires	= icsk->icsk_timeout;
    	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		timer_active	= 4;
    
    		timer_expires	= icsk->icsk_timeout;
    
    	} else if (timer_pending(&sk->sk_timer)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		timer_active	= 2;
    
    		timer_expires	= sk->sk_timer.expires;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else {
    		timer_active	= 0;
    		timer_expires = jiffies;
    	}
    
    
    	if (sk->sk_state == TCP_LISTEN)
    		rx_queue = sk->sk_ack_backlog;
    	else
    		/*
    		 * because we dont lock socket, we might find a transient negative value
    		 */
    		rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
    
    
    	seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
    
    			"%08X %5d %8d %lu %d %pK %lu %lu %u %u %d%n",
    
    		i, src, srcp, dest, destp, sk->sk_state,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		timer_active,
    
    		jiffies_delta_to_clock_t(timer_expires - jiffies),
    
    		icsk->icsk_retransmits,
    
    		from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
    
    		sock_i_ino(sk),
    		atomic_read(&sk->sk_refcnt), sk,
    
    		jiffies_to_clock_t(icsk->icsk_rto),
    		jiffies_to_clock_t(icsk->icsk_ack.ato),
    
    		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		tp->snd_cwnd,
    
    		sk->sk_state == TCP_LISTEN ?
    		    (fastopenq ? fastopenq->max_qlen : 0) :
    		    (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh),
    
    static void get_timewait4_sock(const struct inet_timewait_sock *tw,
    
    			       struct seq_file *f, int i, int *len)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	__be32 dest, src;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	__u16 destp, srcp;
    
    	long delta = tw->tw_ttd - jiffies;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	dest  = tw->tw_daddr;
    	src   = tw->tw_rcv_saddr;
    	destp = ntohs(tw->tw_dport);
    	srcp  = ntohs(tw->tw_sport);
    
    
    	seq_printf(f, "%4d: %08X:%04X %08X:%04X"
    
    		" %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n",
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
    
    		3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
    
    		atomic_read(&tw->tw_refcnt), tw, len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    #define TMPSZ 150
    
    static int tcp4_seq_show(struct seq_file *seq, void *v)
    {
    
    	struct tcp_iter_state *st;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (v == SEQ_START_TOKEN) {
    		seq_printf(seq, "%-*s\n", TMPSZ - 1,
    			   "  sl  local_address rem_address   st tx_queue "
    			   "rx_queue tr tm->when retrnsmt   uid  timeout "
    			   "inode");
    		goto out;
    	}
    	st = seq->private;
    
    	switch (st->state) {
    	case TCP_SEQ_STATE_LISTENING:
    	case TCP_SEQ_STATE_ESTABLISHED:
    
    		get_tcp4_sock(v, seq, st->num, &len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		break;
    	case TCP_SEQ_STATE_OPENREQ:
    
    		get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		break;
    	case TCP_SEQ_STATE_TIME_WAIT:
    
    		get_timewait4_sock(v, seq, st->num, &len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		break;
    	}
    
    	seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	return 0;
    }
    
    
    static const struct file_operations tcp_afinfo_seq_fops = {
    	.owner   = THIS_MODULE,
    	.open    = tcp_seq_open,
    	.read    = seq_read,
    	.llseek  = seq_lseek,
    	.release = seq_release_net
    };
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    static struct tcp_seq_afinfo tcp4_seq_afinfo = {
    	.name		= "tcp",
    	.family		= AF_INET,
    
    	.seq_ops	= {
    		.show		= tcp4_seq_show,
    	},
    
    static int __net_init tcp4_proc_init_net(struct net *net)
    
    {
    	return tcp_proc_register(net, &tcp4_seq_afinfo);
    }
    
    
    static void __net_exit tcp4_proc_exit_net(struct net *net)
    
    {
    	tcp_proc_unregister(net, &tcp4_seq_afinfo);
    }
    
    static struct pernet_operations tcp4_net_ops = {
    	.init = tcp4_proc_init_net,
    	.exit = tcp4_proc_exit_net,
    };
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    int __init tcp4_proc_init(void)
    {
    
    	return register_pernet_subsys(&tcp4_net_ops);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    void tcp4_proc_exit(void)
    {
    
    	unregister_pernet_subsys(&tcp4_net_ops);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    #endif /* CONFIG_PROC_FS */
    
    
    Herbert Xu's avatar
    Herbert Xu committed
    struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
    {
    
    	const struct iphdr *iph = skb_gro_network_header(skb);
    
    	__wsum wsum;
    	__sum16 sum;
    
    Herbert Xu's avatar
    Herbert Xu committed
    
    	switch (skb->ip_summed) {
    	case CHECKSUM_COMPLETE:
    
    		if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
    
    Herbert Xu's avatar
    Herbert Xu committed
    				  skb->csum)) {
    			skb->ip_summed = CHECKSUM_UNNECESSARY;
    			break;
    		}
    
    Herbert Xu's avatar
    Herbert Xu committed
    		NAPI_GRO_CB(skb)->flush = 1;
    		return NULL;
    
    
    	case CHECKSUM_NONE:
    		wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
    					  skb_gro_len(skb), IPPROTO_TCP, 0);
    		sum = csum_fold(skb_checksum(skb,
    					     skb_gro_offset(skb),
    					     skb_gro_len(skb),
    					     wsum));
    		if (sum)
    			goto flush;
    
    		skb->ip_summed = CHECKSUM_UNNECESSARY;
    		break;
    
    Herbert Xu's avatar
    Herbert Xu committed
    	}
    
    	return tcp_gro_receive(head, skb);
    }
    
    int tcp4_gro_complete(struct sk_buff *skb)
    {
    
    	const struct iphdr *iph = ip_hdr(skb);
    
    Herbert Xu's avatar
    Herbert Xu committed
    	struct tcphdr *th = tcp_hdr(skb);
    
    	th->check = ~tcp_v4_check(skb->len - skb_transport_offset(skb),
    				  iph->saddr, iph->daddr, 0);
    	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
    
    	return tcp_gro_complete(skb);
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    struct proto tcp_prot = {
    	.name			= "TCP",
    	.owner			= THIS_MODULE,
    	.close			= tcp_close,
    	.connect		= tcp_v4_connect,
    	.disconnect		= tcp_disconnect,
    
    	.accept			= inet_csk_accept,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	.ioctl			= tcp_ioctl,
    	.init			= tcp_v4_init_sock,
    	.destroy		= tcp_v4_destroy_sock,
    	.shutdown		= tcp_shutdown,
    	.setsockopt		= tcp_setsockopt,
    	.getsockopt		= tcp_getsockopt,
    	.recvmsg		= tcp_recvmsg,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	.backlog_rcv		= tcp_v4_do_rcv,
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	.release_cb		= tcp_release_cb,
    
    	.mtu_reduced		= tcp_v4_mtu_reduced,
    
    	.hash			= inet_hash,
    	.unhash			= inet_unhash,
    	.get_port		= inet_csk_get_port,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	.enter_memory_pressure	= tcp_enter_memory_pressure,
    	.sockets_allocated	= &tcp_sockets_allocated,
    
    	.orphan_count		= &tcp_orphan_count,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	.memory_allocated	= &tcp_memory_allocated,
    	.memory_pressure	= &tcp_memory_pressure,
    	.sysctl_wmem		= sysctl_tcp_wmem,
    	.sysctl_rmem		= sysctl_tcp_rmem,
    	.max_header		= MAX_TCP_HEADER,
    	.obj_size		= sizeof(struct tcp_sock),
    
    	.slab_flags		= SLAB_DESTROY_BY_RCU,
    
    	.twsk_prot		= &tcp_timewait_sock_ops,
    
    	.rsk_prot		= &tcp_request_sock_ops,
    
    	.h.hashinfo		= &tcp_hashinfo,
    
    #ifdef CONFIG_COMPAT
    	.compat_setsockopt	= compat_tcp_setsockopt,
    	.compat_getsockopt	= compat_tcp_getsockopt,
    #endif
    
    #ifdef CONFIG_MEMCG_KMEM
    
    	.init_cgroup		= tcp_init_cgroup,
    	.destroy_cgroup		= tcp_destroy_cgroup,
    	.proto_cgroup		= tcp_proto_cgroup,
    #endif
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    };
    
    EXPORT_SYMBOL(tcp_prot);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static int __net_init tcp_sk_init(struct net *net)
    {
    
    	return 0;
    
    }
    
    static void __net_exit tcp_sk_exit(struct net *net)
    {
    
    }
    
    static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
    {
    	inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
    
    }
    
    static struct pernet_operations __net_initdata tcp_sk_ops = {
    
           .init	   = tcp_sk_init,
           .exit	   = tcp_sk_exit,
           .exit_batch = tcp_sk_exit_batch,
    
    void __init tcp_v4_init(void)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	inet_hashinfo_init(&tcp_hashinfo);
    
    	if (register_pernet_subsys(&tcp_sk_ops))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		panic("Failed to create the TCP control socket.\n");
    }