Skip to content
Snippets Groups Projects
tcp_ipv4.c 75.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	/* Add Key to the list */
    
    	struct tcp_md5sig_key *key;
    
    	struct tcp_sock *tp = tcp_sk(sk);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	struct tcp_md5sig_info *md5sig;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
    
    	if (key) {
    		/* Pre-existing entry - just update that one. */
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		memcpy(key->key, newkey, newkeylen);
    
    		key->keylen = newkeylen;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return 0;
    	}
    
    	md5sig = rcu_dereference_protected(tp->md5sig_info,
    					   sock_owned_by_user(sk));
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	if (!md5sig) {
    		md5sig = kmalloc(sizeof(*md5sig), gfp);
    		if (!md5sig)
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
    		INIT_HLIST_HEAD(&md5sig->head);
    
    		rcu_assign_pointer(tp->md5sig_info, md5sig);
    
    	key = sock_kmalloc(sk, sizeof(*key), gfp);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	if (!key)
    		return -ENOMEM;
    	if (hlist_empty(&md5sig->head) && !tcp_alloc_md5sig_pool(sk)) {
    
    		sock_kfree_s(sk, key, sizeof(*key));
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return -ENOMEM;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    
    	memcpy(key->key, newkey, newkeylen);
    	key->keylen = newkeylen;
    	key->family = family;
    	memcpy(&key->addr, addr,
    	       (family == AF_INET6) ? sizeof(struct in6_addr) :
    				      sizeof(struct in_addr));
    	hlist_add_head_rcu(&key->node, &md5sig->head);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    EXPORT_SYMBOL(tcp_md5_do_add);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
    
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	struct tcp_md5sig_key *key;
    
    	struct tcp_md5sig_info *md5sig;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    
    	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&addr, AF_INET);
    	if (!key)
    		return -ENOENT;
    	hlist_del_rcu(&key->node);
    
    	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	kfree_rcu(key, rcu);
    
    	md5sig = rcu_dereference_protected(tp->md5sig_info,
    					   sock_owned_by_user(sk));
    	if (hlist_empty(&md5sig->head))
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		tcp_free_md5sig_pool();
    	return 0;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    EXPORT_SYMBOL(tcp_md5_do_del);
    
    static void tcp_clear_md5_list(struct sock *sk)
    
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	struct tcp_md5sig_key *key;
    	struct hlist_node *pos, *n;
    
    	struct tcp_md5sig_info *md5sig;
    
    	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
    
    	if (!hlist_empty(&md5sig->head))
    
    		tcp_free_md5sig_pool();
    
    	hlist_for_each_entry_safe(key, pos, n, &md5sig->head, node) {
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		hlist_del_rcu(&key->node);
    
    		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		kfree_rcu(key, rcu);
    
    static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
    				 int optlen)
    
    {
    	struct tcp_md5sig cmd;
    	struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
    
    	if (optlen < sizeof(cmd))
    		return -EINVAL;
    
    
    	if (copy_from_user(&cmd, optval, sizeof(cmd)))
    
    		return -EFAULT;
    
    	if (sin->sin_family != AF_INET)
    		return -EINVAL;
    
    
    	if (!cmd.tcpm_key || !cmd.tcpm_keylen)
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
    				      AF_INET);
    
    
    	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
    		return -EINVAL;
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
    			      AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
    			      GFP_KERNEL);
    
    static int tcp_v4_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
    					__be32 daddr, __be32 saddr, int nbytes)
    
    {
    	struct tcp4_pseudohdr *bp;
    
    	struct scatterlist sg;
    
    	 * 1. the TCP pseudo-header (in the order: source IP address,
    
    	 * destination IP address, zero-padded protocol number, and
    	 * segment length)
    	 */
    	bp->saddr = saddr;
    	bp->daddr = daddr;
    	bp->pad = 0;
    
    	bp->protocol = IPPROTO_TCP;
    
    	bp->len = cpu_to_be16(nbytes);
    
    	sg_init_one(&sg, bp, sizeof(*bp));
    	return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
    }
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
    
    			       __be32 daddr, __be32 saddr, const struct tcphdr *th)
    
    {
    	struct tcp_md5sig_pool *hp;
    	struct hash_desc *desc;
    
    	hp = tcp_get_md5sig_pool();
    	if (!hp)
    		goto clear_hash_noput;
    	desc = &hp->md5_desc;
    
    	if (crypto_hash_init(desc))
    		goto clear_hash;
    	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
    		goto clear_hash;
    	if (tcp_md5_hash_header(hp, th))
    		goto clear_hash;
    	if (tcp_md5_hash_key(hp, key))
    		goto clear_hash;
    	if (crypto_hash_final(desc, md5_hash))
    
    		goto clear_hash;
    
    	tcp_put_md5sig_pool();
    	return 0;
    
    clear_hash:
    	tcp_put_md5sig_pool();
    clear_hash_noput:
    	memset(md5_hash, 0, 16);
    
    int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
    
    			const struct sock *sk, const struct request_sock *req,
    			const struct sk_buff *skb)
    
    	struct tcp_md5sig_pool *hp;
    	struct hash_desc *desc;
    
    	const struct tcphdr *th = tcp_hdr(skb);
    
    		saddr = inet_sk(sk)->inet_saddr;
    		daddr = inet_sk(sk)->inet_daddr;
    
    	} else if (req) {
    		saddr = inet_rsk(req)->loc_addr;
    		daddr = inet_rsk(req)->rmt_addr;
    
    		const struct iphdr *iph = ip_hdr(skb);
    		saddr = iph->saddr;
    		daddr = iph->daddr;
    
    
    	hp = tcp_get_md5sig_pool();
    	if (!hp)
    		goto clear_hash_noput;
    	desc = &hp->md5_desc;
    
    	if (crypto_hash_init(desc))
    		goto clear_hash;
    
    	if (tcp_v4_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
    		goto clear_hash;
    	if (tcp_md5_hash_header(hp, th))
    		goto clear_hash;
    	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
    		goto clear_hash;
    	if (tcp_md5_hash_key(hp, key))
    		goto clear_hash;
    	if (crypto_hash_final(desc, md5_hash))
    		goto clear_hash;
    
    	tcp_put_md5sig_pool();
    	return 0;
    
    clear_hash:
    	tcp_put_md5sig_pool();
    clear_hash_noput:
    	memset(md5_hash, 0, 16);
    	return 1;
    
    EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    static bool tcp_v4_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
    
    {
    	/*
    	 * This gets called for each TCP segment that arrives
    	 * so we want to be efficient.
    	 * We have 3 drop cases:
    	 * o No MD5 hash and one expected.
    	 * o MD5 hash and we're not expecting one.
    	 * o MD5 hash and its wrong.
    	 */
    
    	const __u8 *hash_location = NULL;
    
    	struct tcp_md5sig_key *hash_expected;
    
    	const struct iphdr *iph = ip_hdr(skb);
    
    	const struct tcphdr *th = tcp_hdr(skb);
    
    	int genhash;
    	unsigned char newhash[16];
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
    					  AF_INET);
    
    	hash_location = tcp_parse_md5sig_option(th);
    
    
    	/* We've parsed the options - do we have a hash? */
    	if (!hash_expected && !hash_location)
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return false;
    
    
    	if (hash_expected && !hash_location) {
    
    		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return true;
    
    	}
    
    	if (!hash_expected && hash_location) {
    
    		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return true;
    
    	}
    
    	/* Okay, so this is hash_expected and hash_location -
    	 * so we need to calculate the checksum.
    	 */
    
    	genhash = tcp_v4_md5_hash_skb(newhash,
    				      hash_expected,
    				      NULL, NULL, skb);
    
    
    	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
    
    		net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
    				     &iph->saddr, ntohs(th->source),
    				     &iph->daddr, ntohs(th->dest),
    				     genhash ? " tcp_v4_calc_md5_hash failed"
    				     : "");
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		return true;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	return false;
    
    struct request_sock_ops tcp_request_sock_ops __read_mostly = {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	.family		=	PF_INET,
    
    	.obj_size	=	sizeof(struct tcp_request_sock),
    
    	.rtx_syn_ack	=	tcp_v4_rtx_synack,
    
    	.send_ack	=	tcp_v4_reqsk_send_ack,
    	.destructor	=	tcp_v4_reqsk_destructor,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	.send_reset	=	tcp_v4_send_reset,
    
    	.syn_ack_timeout = 	tcp_syn_ack_timeout,
    
    #ifdef CONFIG_TCP_MD5SIG
    
    static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
    
    	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
    
    	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
    
    static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb,
    			       struct request_sock *req,
    			       struct tcp_fastopen_cookie *foc,
    			       struct tcp_fastopen_cookie *valid_foc)
    {
    	bool skip_cookie = false;
    	struct fastopen_queue *fastopenq;
    
    	if (likely(!fastopen_cookie_present(foc))) {
    		/* See include/net/tcp.h for the meaning of these knobs */
    		if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) ||
    		    ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) &&
    		    (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1)))
    			skip_cookie = true; /* no cookie to validate */
    		else
    			return false;
    	}
    	fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
    	/* A FO option is present; bump the counter. */
    	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE);
    
    	/* Make sure the listener has enabled fastopen, and we don't
    	 * exceed the max # of pending TFO requests allowed before trying
    	 * to validating the cookie in order to avoid burning CPU cycles
    	 * unnecessarily.
    	 *
    	 * XXX (TFO) - The implication of checking the max_qlen before
    	 * processing a cookie request is that clients can't differentiate
    	 * between qlen overflow causing Fast Open to be disabled
    	 * temporarily vs a server not supporting Fast Open at all.
    	 */
    	if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 ||
    	    fastopenq == NULL || fastopenq->max_qlen == 0)
    		return false;
    
    	if (fastopenq->qlen >= fastopenq->max_qlen) {
    		struct request_sock *req1;
    		spin_lock(&fastopenq->lock);
    		req1 = fastopenq->rskq_rst_head;
    		if ((req1 == NULL) || time_after(req1->expires, jiffies)) {
    			spin_unlock(&fastopenq->lock);
    			NET_INC_STATS_BH(sock_net(sk),
    			    LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
    			/* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/
    			foc->len = -1;
    			return false;
    		}
    		fastopenq->rskq_rst_head = req1->dl_next;
    		fastopenq->qlen--;
    		spin_unlock(&fastopenq->lock);
    		reqsk_free(req1);
    	}
    	if (skip_cookie) {
    		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
    		return true;
    	}
    	if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) {
    		if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) {
    			tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
    			if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) ||
    			    memcmp(&foc->val[0], &valid_foc->val[0],
    			    TCP_FASTOPEN_COOKIE_SIZE) != 0)
    				return false;
    			valid_foc->len = -1;
    		}
    		/* Acknowledge the data received from the peer. */
    		tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
    		return true;
    	} else if (foc->len == 0) { /* Client requesting a cookie */
    		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
    		NET_INC_STATS_BH(sock_net(sk),
    		    LINUX_MIB_TCPFASTOPENCOOKIEREQD);
    	} else {
    		/* Client sent a cookie with wrong size. Treat it
    		 * the same as invalid and return a valid one.
    		 */
    		tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, valid_foc);
    	}
    	return false;
    }
    
    static int tcp_v4_conn_req_fastopen(struct sock *sk,
    				    struct sk_buff *skb,
    				    struct sk_buff *skb_synack,
    				    struct request_sock *req,
    				    struct request_values *rvp)
    {
    	struct tcp_sock *tp = tcp_sk(sk);
    	struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
    	const struct inet_request_sock *ireq = inet_rsk(req);
    	struct sock *child;
    
    	req->num_retrans = 0;
    	req->num_timeout = 0;
    
    	req->sk = NULL;
    
    	child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
    	if (child == NULL) {
    		NET_INC_STATS_BH(sock_net(sk),
    				 LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
    		kfree_skb(skb_synack);
    		return -1;
    	}
    
    	err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
    				    ireq->rmt_addr, ireq->opt);
    	err = net_xmit_eval(err);
    	if (!err)
    		tcp_rsk(req)->snt_synack = tcp_time_stamp;
    
    	/* XXX (TFO) - is it ok to ignore error and continue? */
    
    	spin_lock(&queue->fastopenq->lock);
    	queue->fastopenq->qlen++;
    	spin_unlock(&queue->fastopenq->lock);
    
    	/* Initialize the child socket. Have to fix some values to take
    	 * into account the child is a Fast Open socket and is created
    	 * only out of the bits carried in the SYN packet.
    	 */
    	tp = tcp_sk(child);
    
    	tp->fastopen_rsk = req;
    	/* Do a hold on the listner sk so that if the listener is being
    	 * closed, the child that has been accepted can live on and still
    	 * access listen_lock.
    	 */
    	sock_hold(sk);
    	tcp_rsk(req)->listener = sk;
    
    	/* RFC1323: The window in SYN & SYN/ACK segments is never
    	 * scaled. So correct it appropriately.
    	 */
    	tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
    
    	/* Activate the retrans timer so that SYNACK can be retransmitted.
    	 * The request socket is not added to the SYN table of the parent
    	 * because it's been added to the accept queue directly.
    	 */
    	inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
    	    TCP_TIMEOUT_INIT, TCP_RTO_MAX);
    
    	/* Add the child socket directly into the accept queue */
    	inet_csk_reqsk_queue_add(sk, req, child);
    
    	/* Now finish processing the fastopen child socket. */
    	inet_csk(child)->icsk_af_ops->rebuild_header(child);
    	tcp_init_congestion_control(child);
    	tcp_mtup_init(child);
    	tcp_init_buffer_space(child);
    	tcp_init_metrics(child);
    
    	/* Queue the data carried in the SYN packet. We need to first
    	 * bump skb's refcnt because the caller will attempt to free it.
    	 *
    	 * XXX (TFO) - we honor a zero-payload TFO request for now.
    	 * (Any reason not to?)
    	 */
    	if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) {
    		/* Don't queue the skb if there is no payload in SYN.
    		 * XXX (TFO) - How about SYN+FIN?
    		 */
    		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
    	} else {
    		skb = skb_get(skb);
    		skb_dst_drop(skb);
    		__skb_pull(skb, tcp_hdr(skb)->doff * 4);
    		skb_set_owner_r(skb, child);
    		__skb_queue_tail(&child->sk_receive_queue, skb);
    		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
    
    		tp->syn_data_acked = 1;
    
    	}
    	sk->sk_data_ready(sk, 0);
    	bh_unlock_sock(child);
    	sock_put(child);
    	WARN_ON(req->sk == NULL);
    	return 0;
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
    {
    
    	struct tcp_extend_values tmp_ext;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct tcp_options_received tmp_opt;
    
    	const u8 *hash_location;
    
    	struct request_sock *req;
    
    	struct tcp_sock *tp = tcp_sk(sk);
    
    	__be32 saddr = ip_hdr(skb)->saddr;
    	__be32 daddr = ip_hdr(skb)->daddr;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	__u32 isn = TCP_SKB_CB(skb)->when;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	bool want_cookie = false;
    
    	struct flowi4 fl4;
    	struct tcp_fastopen_cookie foc = { .len = -1 };
    	struct tcp_fastopen_cookie valid_foc = { .len = -1 };
    	struct sk_buff *skb_synack;
    	int do_fastopen;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Never answer to SYNs send to broadcast or multicast */
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto drop;
    
    	/* TW buckets are converted to open requests without
    	 * limitations, they conserve resources and peer is
    	 * evidently real one.
    	 */
    
    	if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
    
    		want_cookie = tcp_syn_flood_action(sk, skb, "TCP");
    		if (!want_cookie)
    			goto drop;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	/* Accept backlog is full. If we have already queued enough
    	 * of warm entries in syn queue, drop request. It is better than
    	 * clogging syn queue with openreqs with exponentially increasing
    	 * timeout.
    	 */
    
    	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto drop;
    
    
    	req = inet_reqsk_alloc(&tcp_request_sock_ops);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!req)
    		goto drop;
    
    
    #ifdef CONFIG_TCP_MD5SIG
    	tcp_rsk(req)->af_specific = &tcp_request_sock_ipv4_ops;
    #endif
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tcp_clear_options(&tmp_opt);
    
    	tmp_opt.mss_clamp = TCP_MSS_DEFAULT;
    
    	tmp_opt.user_mss  = tp->rx_opt.user_mss;
    
    	tcp_parse_options(skb, &tmp_opt, &hash_location, 0,
    	    want_cookie ? NULL : &foc);
    
    
    	if (tmp_opt.cookie_plus > 0 &&
    	    tmp_opt.saw_tstamp &&
    	    !tp->rx_opt.cookie_out_never &&
    	    (sysctl_tcp_cookie_size > 0 ||
    	     (tp->cookie_values != NULL &&
    	      tp->cookie_values->cookie_desired > 0))) {
    		u8 *c;
    		u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
    		int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
    
    		if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
    			goto drop_and_release;
    
    		/* Secret recipe starts with IP addresses */
    
    		*mess++ ^= (__force u32)daddr;
    		*mess++ ^= (__force u32)saddr;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* plus variable length Initiator Cookie */
    		c = (u8 *)mess;
    		while (l-- > 0)
    			*c++ ^= *hash_location++;
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		want_cookie = false;	/* not our kind of cookie */
    
    		tmp_ext.cookie_out_never = 0; /* false */
    		tmp_ext.cookie_plus = tmp_opt.cookie_plus;
    	} else if (!tp->rx_opt.cookie_in_always) {
    		/* redundant indications, but ensure initialization. */
    		tmp_ext.cookie_out_never = 1; /* true */
    		tmp_ext.cookie_plus = 0;
    	} else {
    		goto drop_and_release;
    	}
    	tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (want_cookie && !tmp_opt.saw_tstamp)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		tcp_clear_options(&tmp_opt);
    
    	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
    	tcp_openreq_init(req, &tmp_opt, skb);
    
    
    	ireq = inet_rsk(req);
    	ireq->loc_addr = daddr;
    	ireq->rmt_addr = saddr;
    	ireq->no_srccheck = inet_sk(sk)->transparent;
    
    	ireq->opt = tcp_v4_save_options(skb);
    
    	if (security_inet_conn_request(sk, skb, req))
    
    	if (!want_cookie || tmp_opt.tstamp_ok)
    
    		TCP_ECN_create_request(req, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (want_cookie) {
    		isn = cookie_v4_init_sequence(sk, skb, &req->mss);
    
    		req->cookie_ts = tmp_opt.tstamp_ok;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else if (!isn) {
    		/* VJ's idea. We save last timestamp seen
    		 * from the destination in peer table, when entering
    		 * state TIME-WAIT, and check against it before
    		 * accepting new connection request.
    		 *
    		 * If "isn" is not zero, this request hit alive
    		 * timewait bucket, so that all the necessary checks
    		 * are made in the function processing timewait state.
    		 */
    		if (tmp_opt.saw_tstamp &&
    
    		    tcp_death_row.sysctl_tw_recycle &&
    
    		    (dst = inet_csk_route_req(sk, &fl4, req)) != NULL &&
    
    		    fl4.daddr == saddr) {
    			if (!tcp_peer_is_proven(req, dst, true)) {
    
    				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
    
    				goto drop_and_release;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			}
    		}
    		/* Kill the following clause, if you dislike this way. */
    		else if (!sysctl_tcp_syncookies &&
    
    			 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			  (sysctl_max_syn_backlog >> 2)) &&
    
    			 !tcp_peer_is_proven(req, dst, false)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			/* Without syncookies last quarter of
    			 * backlog is filled with destinations,
    			 * proven to be alive.
    			 * It means that we continue to communicate
    			 * to destinations, already remembered
    			 * to the moment of synflood.
    			 */
    
    			LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"),
    
    				       &saddr, ntohs(tcp_hdr(skb)->source));
    
    			goto drop_and_release;
    
    		isn = tcp_v4_init_sequence(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (dst == NULL) {
    		dst = inet_csk_route_req(sk, &fl4, req);
    		if (dst == NULL)
    			goto drop_and_free;
    	}
    	do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc);
    
    	/* We don't call tcp_v4_send_synack() directly because we need
    	 * to make sure a child socket can be created successfully before
    	 * sending back synack!
    	 *
    	 * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack()
    	 * (or better yet, call tcp_send_synack() in the child context
    	 * directly, but will have to fix bunch of other code first)
    	 * after syn_recv_sock() except one will need to first fix the
    	 * latter to remove its dependency on the current implementation
    	 * of tcp_v4_send_synack()->tcp_select_initial_window().
    	 */
    	skb_synack = tcp_make_synack(sk, dst, req,
    	    (struct request_values *)&tmp_ext,
    	    fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL);
    
    	if (skb_synack) {
    		__tcp_v4_send_check(skb_synack, ireq->loc_addr, ireq->rmt_addr);
    		skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb));
    	} else
    		goto drop_and_free;
    
    	if (likely(!do_fastopen)) {
    		int err;
    		err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr,
    		     ireq->rmt_addr, ireq->opt);
    		err = net_xmit_eval(err);
    		if (err || want_cookie)
    			goto drop_and_free;
    
    
    		tcp_rsk(req)->snt_synack = tcp_time_stamp;
    
    		tcp_rsk(req)->listener = NULL;
    		/* Add the request_sock to the SYN table */
    		inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
    		if (fastopen_cookie_present(&foc) && foc.len != 0)
    			NET_INC_STATS_BH(sock_net(sk),
    			    LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
    	} else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req,
    	    (struct request_values *)&tmp_ext))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto drop_and_free;
    
    	return 0;
    
    
    drop_and_release:
    	dst_release(dst);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    drop_and_free:
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    drop:
    	return 0;
    }
    
    EXPORT_SYMBOL(tcp_v4_conn_request);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /*
     * The three way handshake has completed - we got a valid synack -
     * now create the new socket.
     */
    struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
    
    				  struct request_sock *req,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				  struct dst_entry *dst)
    {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct inet_sock *newinet;
    	struct tcp_sock *newtp;
    	struct sock *newsk;
    
    #ifdef CONFIG_TCP_MD5SIG
    	struct tcp_md5sig_key *key;
    #endif
    
    	struct ip_options_rcu *inet_opt;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (sk_acceptq_is_full(sk))
    		goto exit_overflow;
    
    	newsk = tcp_create_openreq_child(sk, req, skb);
    	if (!newsk)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	newsk->sk_gso_type = SKB_GSO_TCPV4;
    
    	inet_sk_rx_dst_set(newsk, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	newtp		      = tcp_sk(newsk);
    	newinet		      = inet_sk(newsk);
    
    	newinet->inet_daddr   = ireq->rmt_addr;
    	newinet->inet_rcv_saddr = ireq->loc_addr;
    	newinet->inet_saddr	      = ireq->loc_addr;
    
    	inet_opt	      = ireq->opt;
    	rcu_assign_pointer(newinet->inet_opt, inet_opt);
    
    	newinet->mc_index     = inet_iif(skb);
    
    	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
    
    	newinet->rcv_tos      = ip_hdr(skb)->tos;
    
    	inet_csk(newsk)->icsk_ext_hdr_len = 0;
    
    	if (inet_opt)
    		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
    
    	newinet->inet_id = newtp->write_seq ^ jiffies;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (!dst) {
    		dst = inet_csk_route_child_sock(sk, newsk, req);
    		if (!dst)
    			goto put_and_exit;
    	} else {
    		/* syncookie case : see end of cookie_v4_check() */
    	}
    
    John Heffner's avatar
    John Heffner committed
    	tcp_mtup_init(newsk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tcp_sync_mss(newsk, dst_mtu(dst));
    
    	newtp->advmss = dst_metric_advmss(dst);
    
    	if (tcp_sk(sk)->rx_opt.user_mss &&
    	    tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
    		newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	tcp_initialize_rcv_mss(newsk);
    
    	tcp_synack_rtt_meas(newsk, req);
    
    	newtp->total_retrans = req->num_retrans;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    #ifdef CONFIG_TCP_MD5SIG
    	/* Copy over the MD5 key from the original socket */
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
    				AF_INET);
    
    	if (key != NULL) {
    
    		/*
    		 * We're using one, so create a matching key
    		 * on the newsk structure. If we fail to get
    		 * memory, then we end up not copying the key
    		 * across. Shucks.
    		 */
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
    			       AF_INET, key->key, key->keylen, GFP_ATOMIC);
    
    		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
    
    	if (__inet_inherit_port(sk, newsk) < 0)
    		goto put_and_exit;
    
    	__inet_hash_nolisten(newsk, NULL);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	return newsk;
    
    exit_overflow:
    
    	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    exit:
    
    	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return NULL;
    
    	inet_csk_prepare_forced_close(newsk);
    	tcp_done(newsk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
    {
    
    	struct tcphdr *th = tcp_hdr(skb);
    
    	const struct iphdr *iph = ip_hdr(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct sock *nsk;
    
    	struct request_sock **prev;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Find possible connection requests. */
    
    	struct request_sock *req = inet_csk_search_req(sk, &prev, th->source,
    						       iph->saddr, iph->daddr);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (req)
    
    		return tcp_check_req(sk, skb, req, prev, false);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
    
    			th->source, iph->daddr, th->dest, inet_iif(skb));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (nsk) {
    		if (nsk->sk_state != TCP_TIME_WAIT) {
    			bh_lock_sock(nsk);
    			return nsk;
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return NULL;
    	}
    
    #ifdef CONFIG_SYN_COOKIES
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		sk = cookie_v4_check(sk, skb, &(IPCB(skb)->opt));
    #endif
    	return sk;
    }
    
    
    static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	const struct iphdr *iph = ip_hdr(skb);
    
    
    	if (skb->ip_summed == CHECKSUM_COMPLETE) {
    
    		if (!tcp_v4_check(skb->len, iph->saddr,
    				  iph->daddr, skb->csum)) {
    
    			skb->ip_summed = CHECKSUM_UNNECESSARY;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			return 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
    
    				       skb->len, IPPROTO_TCP, 0);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (skb->len <= 76) {
    
    		return __skb_checksum_complete(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return 0;
    }
    
    
    /* The socket must have it's spinlock held when we get
     * here.
     *
     * We have a potential double-lock case here, so even when
     * doing backlog processing we use the BH locking scheme.
     * This is because we cannot sleep with the original spinlock
     * held.
     */
    int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
    {
    
    	struct sock *rsk;
    #ifdef CONFIG_TCP_MD5SIG
    	/*
    	 * We really want to reject the packet as early as possible
    	 * if:
    	 *  o We're expecting an MD5'd packet and this is no MD5 tcp option
    	 *  o There is an MD5 option and we're not expecting one
    	 */
    
    	if (tcp_v4_inbound_md5_hash(sk, skb))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
    
    		struct dst_entry *dst = sk->sk_rx_dst;
    
    
    		sock_rps_save_rxhash(sk, skb);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
    			    dst->ops->check(dst, 0) == NULL) {
    
    				dst_release(dst);
    				sk->sk_rx_dst = NULL;
    			}
    		}
    
    		if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			goto reset;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return 0;
    	}
    
    
    	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto csum_err;
    
    	if (sk->sk_state == TCP_LISTEN) {
    		struct sock *nsk = tcp_v4_hnd_req(sk, skb);
    		if (!nsk)
    			goto discard;
    
    		if (nsk != sk) {
    
    			sock_rps_save_rxhash(nsk, skb);
    
    			if (tcp_child_process(sk, nsk, skb)) {
    				rsk = nsk;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				goto reset;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			return 0;
    		}
    
    		sock_rps_save_rxhash(sk, skb);
    
    	if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto reset;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    
    reset:
    
    	tcp_v4_send_reset(rsk, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    discard:
    	kfree_skb(skb);
    	/* Be careful here. If this function gets more complicated and
    	 * gcc suffers from register pressure on the x86, sk (in %ebx)
    	 * might be destroyed here. This current version compiles correctly,
    	 * but you have been warned.
    	 */
    	return 0;
    
    csum_err:
    
    	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	goto discard;
    }
    
    EXPORT_SYMBOL(tcp_v4_do_rcv);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    void tcp_v4_early_demux(struct sk_buff *skb)
    
    {
    	const struct iphdr *iph;
    	const struct tcphdr *th;
    	struct sock *sk;
    
    	if (skb->pkt_type != PACKET_HOST)
    
    	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
    
    
    	iph = ip_hdr(skb);
    
    	th = tcp_hdr(skb);
    
    
    	if (th->doff < sizeof(struct tcphdr) / 4)
    
    	sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
    
    				       iph->saddr, th->source,
    
    				       iph->daddr, ntohs(th->dest),
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    				       skb->skb_iif);
    
    	if (sk) {
    		skb->sk = sk;
    		skb->destructor = sock_edemux;
    		if (sk->sk_state != TCP_TIME_WAIT) {
    			struct dst_entry *dst = sk->sk_rx_dst;
    
    			if (dst)
    				dst = dst_check(dst, 0);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
    
    				skb_dst_set_noref(skb, dst);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	From tcp_input.c
     */
    
    int tcp_v4_rcv(struct sk_buff *skb)
    {
    
    	const struct tcphdr *th;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct sock *sk;
    	int ret;
    
    	struct net *net = dev_net(skb->dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (skb->pkt_type != PACKET_HOST)
    		goto discard_it;
    
    	/* Count it even if it's bad */
    
    	TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
    		goto discard_it;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (th->doff < sizeof(struct tcphdr) / 4)
    		goto bad_packet;
    	if (!pskb_may_pull(skb, th->doff * 4))
    		goto discard_it;
    
    	/* An explanation is required here, I think.
    	 * Packet length and doff are validated by header prediction,
    
    Stephen Hemminger's avatar
    Stephen Hemminger committed
    	 * provided case of th->doff==0 is eliminated.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 * So, we defer the checks. */
    
    	if (!skb_csum_unnecessary(skb) && tcp_v4_checksum_init(skb))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto bad_packet;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
    	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
    				    skb->len - th->doff * 4);
    	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
    	TCP_SKB_CB(skb)->when	 = 0;
    
    	TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	TCP_SKB_CB(skb)->sacked	 = 0;
    
    
    	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!sk)
    		goto no_tcp_socket;
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    process:
    	if (sk->sk_state == TCP_TIME_WAIT)
    		goto do_time_wait;