Skip to content
Snippets Groups Projects
af_inet.c 39.6 KiB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
		}

		answer = NULL;
	}
	if (answer)
		goto out_permanent;

	/* Add the new entry after the last permanent entry if any, so that
	 * the new entry does not override a permanent entry when matched with
	 * a wild-card protocol. But it is allowed to override any existing
	 * non-permanent entry.  This means that when we remove this entry, the
Linus Torvalds's avatar
Linus Torvalds committed
	 * system automatically returns to the old behavior.
	 */
	list_add_rcu(&p->list, last_perm);
out:
	spin_unlock_bh(&inetsw_lock);

	return;

out_permanent:
	printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
	       protocol);
	goto out;

out_illegal:
	printk(KERN_ERR
	       "Ignoring attempt to register invalid socket type %d.\n",
	       p->type);
	goto out;
}
Eric Dumazet's avatar
Eric Dumazet committed
EXPORT_SYMBOL(inet_register_protosw);
Linus Torvalds's avatar
Linus Torvalds committed

void inet_unregister_protosw(struct inet_protosw *p)
{
	if (INET_PROTOSW_PERMANENT & p->flags) {
		printk(KERN_ERR
		       "Attempt to unregister permanent protocol %d.\n",
		       p->protocol);
	} else {
		spin_lock_bh(&inetsw_lock);
		list_del_rcu(&p->list);
		spin_unlock_bh(&inetsw_lock);

		synchronize_net();
	}
}
Eric Dumazet's avatar
Eric Dumazet committed
EXPORT_SYMBOL(inet_unregister_protosw);
Linus Torvalds's avatar
Linus Torvalds committed

/*
 *      Shall we try to damage output packets if routing dev changes?
 */

int sysctl_ip_dynaddr __read_mostly;

static int inet_sk_reselect_saddr(struct sock *sk)
{
	struct inet_sock *inet = inet_sk(sk);
	int err;
	struct rtable *rt;
	__be32 old_saddr = inet->inet_saddr;
	__be32 new_saddr;
	__be32 daddr = inet->inet_daddr;

	if (inet->opt && inet->opt->srr)
		daddr = inet->opt->faddr;

	/* Query new route. */
	err = ip_route_connect(&rt, daddr, 0,
			       RT_CONN_FLAGS(sk),
			       sk->sk_bound_dev_if,
			       sk->sk_protocol,
			       inet->inet_sport, inet->inet_dport, sk, 0);
	if (err)
		return err;

	sk_setup_caps(sk, &rt->u.dst);

	new_saddr = rt->rt_src;

	if (new_saddr == old_saddr)
		return 0;

	if (sysctl_ip_dynaddr > 1) {
		printk(KERN_INFO "%s(): shifting inet->saddr from %pI4 to %pI4\n",
		       __func__, &old_saddr, &new_saddr);
	inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;

	/*
	 * XXX The only one ugly spot where we need to
	 * XXX really change the sockets identity after
	 * XXX it has entered the hashes. -DaveM
	 *
	 * Besides that, it does not check for connection
	 * uniqueness. Wait for troubles.
	 */
	__sk_prot_rehash(sk);
	return 0;
}

int inet_sk_rebuild_header(struct sock *sk)
{
	struct inet_sock *inet = inet_sk(sk);
	struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
	__be32 daddr;
	int err;

	/* Route is OK, nothing to do. */
	if (rt)
		return 0;

	/* Reroute. */
	daddr = inet->inet_daddr;
	if (inet->opt && inet->opt->srr)
		daddr = inet->opt->faddr;
{
	struct flowi fl = {
		.oif = sk->sk_bound_dev_if,
		.mark = sk->sk_mark,
		.nl_u = {
			.ip4_u = {
				.daddr	= daddr,
				.saddr	= inet->inet_saddr,
				.tos	= RT_CONN_FLAGS(sk),
			},
		},
		.proto = sk->sk_protocol,
				.sport = inet->inet_sport,
				.dport = inet->inet_dport,
	security_sk_classify_flow(sk, &fl);
	err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0);
}
	if (!err)
		sk_setup_caps(sk, &rt->u.dst);
	else {
		/* Routing failed... */
		sk->sk_route_caps = 0;
		/*
		 * Other protocols have to map its equivalent state to TCP_SYN_SENT.
		 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
		 */
		if (!sysctl_ip_dynaddr ||
		    sk->sk_state != TCP_SYN_SENT ||
		    (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
		    (err = inet_sk_reselect_saddr(sk)) != 0)
			sk->sk_err_soft = -err;
	}

	return err;
}
EXPORT_SYMBOL(inet_sk_rebuild_header);

static int inet_gso_send_check(struct sk_buff *skb)
{
	struct iphdr *iph;
	const struct net_protocol *ops;
	int proto;
	int ihl;
	int err = -EINVAL;

	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
		goto out;

	ihl = iph->ihl * 4;
	if (ihl < sizeof(*iph))
		goto out;

	if (unlikely(!pskb_may_pull(skb, ihl)))
		goto out;

	__skb_pull(skb, ihl);
	skb_reset_transport_header(skb);
	proto = iph->protocol & (MAX_INET_PROTOS - 1);
	err = -EPROTONOSUPPORT;

	rcu_read_lock();
	ops = rcu_dereference(inet_protos[proto]);
	if (likely(ops && ops->gso_send_check))
		err = ops->gso_send_check(skb);
	rcu_read_unlock();

out:
	return err;
}

static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
Herbert Xu's avatar
Herbert Xu committed
{
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	struct iphdr *iph;
	const struct net_protocol *ops;
Herbert Xu's avatar
Herbert Xu committed
	int proto;
	int ihl;
	int id;
	if (!(features & NETIF_F_V4_CSUM))
		features &= ~NETIF_F_SG;

	if (unlikely(skb_shinfo(skb)->gso_type &
		     ~(SKB_GSO_TCPV4 |
		       SKB_GSO_UDP |
		       SKB_GSO_DODGY |
		       SKB_GSO_TCP_ECN |
		       0)))
		goto out;

	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
Herbert Xu's avatar
Herbert Xu committed
		goto out;

Herbert Xu's avatar
Herbert Xu committed
	ihl = iph->ihl * 4;
	if (ihl < sizeof(*iph))
		goto out;

	if (unlikely(!pskb_may_pull(skb, ihl)))
Herbert Xu's avatar
Herbert Xu committed
		goto out;

	__skb_pull(skb, ihl);
	skb_reset_transport_header(skb);
Herbert Xu's avatar
Herbert Xu committed
	id = ntohs(iph->id);
	proto = iph->protocol & (MAX_INET_PROTOS - 1);
	segs = ERR_PTR(-EPROTONOSUPPORT);

	rcu_read_lock();
	ops = rcu_dereference(inet_protos[proto]);
	if (likely(ops && ops->gso_segment))
		segs = ops->gso_segment(skb, features);
Herbert Xu's avatar
Herbert Xu committed
	rcu_read_unlock();

	if (!segs || IS_ERR(segs))
Herbert Xu's avatar
Herbert Xu committed
		goto out;

	skb = segs;
	do {
		if (proto == IPPROTO_UDP) {
			iph->id = htons(id);
			iph->frag_off = htons(offset >> 3);
			if (skb->next != NULL)
				iph->frag_off |= htons(IP_MF);
			offset += (skb->len - skb->mac_len - iph->ihl * 4);
		} else
			iph->id = htons(id++);
Herbert Xu's avatar
Herbert Xu committed
		iph->tot_len = htons(skb->len - skb->mac_len);
		iph->check = 0;
		iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
Herbert Xu's avatar
Herbert Xu committed
	} while ((skb = skb->next));

out:
	return segs;
}

Herbert Xu's avatar
Herbert Xu committed
static struct sk_buff **inet_gro_receive(struct sk_buff **head,
					 struct sk_buff *skb)
{
	const struct net_protocol *ops;
Herbert Xu's avatar
Herbert Xu committed
	struct sk_buff **pp = NULL;
	struct sk_buff *p;
	struct iphdr *iph;
	unsigned int hlen;
	unsigned int off;
Herbert Xu's avatar
Herbert Xu committed
	int flush = 1;
	int proto;

	off = skb_gro_offset(skb);
	hlen = off + sizeof(*iph);
	iph = skb_gro_header_fast(skb, off);
	if (skb_gro_header_hard(skb, hlen)) {
		iph = skb_gro_header_slow(skb, hlen, off);
		if (unlikely(!iph))
			goto out;
	}
Herbert Xu's avatar
Herbert Xu committed

	proto = iph->protocol & (MAX_INET_PROTOS - 1);

	rcu_read_lock();
	ops = rcu_dereference(inet_protos[proto]);
	if (!ops || !ops->gro_receive)
		goto out_unlock;

	if (*(u8 *)iph != 0x45)
Herbert Xu's avatar
Herbert Xu committed
		goto out_unlock;

	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
		goto out_unlock;

	id = ntohl(*(u32 *)&iph->id);
	flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
	id >>= 16;
Herbert Xu's avatar
Herbert Xu committed

	for (p = *head; p; p = p->next) {
		struct iphdr *iph2;

		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		iph2 = ip_hdr(p);

		if ((iph->protocol ^ iph2->protocol) |
		    (iph->tos ^ iph2->tos) |
		    (iph->saddr ^ iph2->saddr) |
		    (iph->daddr ^ iph2->daddr)) {
Herbert Xu's avatar
Herbert Xu committed
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}

		/* All fields must match except length and checksum. */
		NAPI_GRO_CB(p)->flush |=
			(iph->ttl ^ iph2->ttl) |
			((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
Herbert Xu's avatar
Herbert Xu committed

		NAPI_GRO_CB(p)->flush |= flush;
	}

	NAPI_GRO_CB(skb)->flush |= flush;
	skb_gro_pull(skb, sizeof(*iph));
	skb_set_transport_header(skb, skb_gro_offset(skb));
Herbert Xu's avatar
Herbert Xu committed

	pp = ops->gro_receive(head, skb);

out_unlock:
	rcu_read_unlock();

out:
	NAPI_GRO_CB(skb)->flush |= flush;

	return pp;
}

static int inet_gro_complete(struct sk_buff *skb)
{
	const struct net_protocol *ops;
Herbert Xu's avatar
Herbert Xu committed
	struct iphdr *iph = ip_hdr(skb);
	int proto = iph->protocol & (MAX_INET_PROTOS - 1);
	int err = -ENOSYS;
	__be16 newlen = htons(skb->len - skb_network_offset(skb));

	csum_replace2(&iph->check, iph->tot_len, newlen);
	iph->tot_len = newlen;

	rcu_read_lock();
	ops = rcu_dereference(inet_protos[proto]);
	if (WARN_ON(!ops || !ops->gro_complete))
		goto out_unlock;

	err = ops->gro_complete(skb);

out_unlock:
	rcu_read_unlock();

	return err;
}

int inet_ctl_sock_create(struct sock **sk, unsigned short family,
			 unsigned short type, unsigned char protocol,
			 struct net *net)
	struct socket *sock;
	int rc = sock_create_kern(family, type, protocol, &sock);
		*sk = sock->sk;
		(*sk)->sk_allocation = GFP_ATOMIC;
		/*
		 * Unhash it so that IP input processing does not even see it,
		 * we do not wish this socket to see incoming packets.
		 */
	}
	return rc;
}
EXPORT_SYMBOL_GPL(inet_ctl_sock_create);

unsigned long snmp_fold_field(void *mib[], int offt)
{
	unsigned long res = 0;
	int i;

	for_each_possible_cpu(i) {
		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
	}
	return res;
}
EXPORT_SYMBOL_GPL(snmp_fold_field);

int snmp_mib_init(void *ptr[2], size_t mibsize)
{
	BUG_ON(ptr == NULL);
	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
	if (!ptr[0])
		goto err0;
	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
	if (!ptr[1])
		goto err1;
	return 0;
err1:
	free_percpu(ptr[0]);
	ptr[0] = NULL;
err0:
	return -ENOMEM;
}
EXPORT_SYMBOL_GPL(snmp_mib_init);

void snmp_mib_free(void *ptr[2])
{
	BUG_ON(ptr == NULL);
	free_percpu(ptr[0]);
	free_percpu(ptr[1]);
	ptr[0] = ptr[1] = NULL;
}
EXPORT_SYMBOL_GPL(snmp_mib_free);

Linus Torvalds's avatar
Linus Torvalds committed
#ifdef CONFIG_IP_MULTICAST
static const struct net_protocol igmp_protocol = {
Linus Torvalds's avatar
Linus Torvalds committed
	.handler =	igmp_rcv,
Linus Torvalds's avatar
Linus Torvalds committed
};
#endif

static const struct net_protocol tcp_protocol = {
Linus Torvalds's avatar
Linus Torvalds committed
	.handler =	tcp_v4_rcv,
	.err_handler =	tcp_v4_err,
	.gso_send_check = tcp_v4_gso_send_check,
Herbert Xu's avatar
Herbert Xu committed
	.gso_segment =	tcp_tso_segment,
Herbert Xu's avatar
Herbert Xu committed
	.gro_receive =	tcp4_gro_receive,
	.gro_complete =	tcp4_gro_complete,
Linus Torvalds's avatar
Linus Torvalds committed
	.no_policy =	1,
static const struct net_protocol udp_protocol = {
Linus Torvalds's avatar
Linus Torvalds committed
	.handler =	udp_rcv,
	.err_handler =	udp_err,
	.gso_send_check = udp4_ufo_send_check,
	.gso_segment = udp4_ufo_fragment,
Linus Torvalds's avatar
Linus Torvalds committed
	.no_policy =	1,
static const struct net_protocol icmp_protocol = {
Linus Torvalds's avatar
Linus Torvalds committed
	.handler =	icmp_rcv,
	.no_policy =	1,
static __net_init int ipv4_mib_init_net(struct net *net)
{
	if (snmp_mib_init((void **)net->mib.tcp_statistics,
			  sizeof(struct tcp_mib)) < 0)
		goto err_tcp_mib;
	if (snmp_mib_init((void **)net->mib.ip_statistics,
			  sizeof(struct ipstats_mib)) < 0)
		goto err_ip_mib;
	if (snmp_mib_init((void **)net->mib.net_statistics,
			  sizeof(struct linux_mib)) < 0)
		goto err_net_mib;
	if (snmp_mib_init((void **)net->mib.udp_statistics,
			  sizeof(struct udp_mib)) < 0)
		goto err_udp_mib;
	if (snmp_mib_init((void **)net->mib.udplite_statistics,
			  sizeof(struct udp_mib)) < 0)
		goto err_udplite_mib;
	if (snmp_mib_init((void **)net->mib.icmp_statistics,
			  sizeof(struct icmp_mib)) < 0)
		goto err_icmp_mib;
	if (snmp_mib_init((void **)net->mib.icmpmsg_statistics,
			  sizeof(struct icmpmsg_mib)) < 0)
		goto err_icmpmsg_mib;

	tcp_mib_init(net);
	return 0;
err_icmpmsg_mib:
	snmp_mib_free((void **)net->mib.icmp_statistics);
err_icmp_mib:
	snmp_mib_free((void **)net->mib.udplite_statistics);
err_udplite_mib:
	snmp_mib_free((void **)net->mib.udp_statistics);
err_udp_mib:
	snmp_mib_free((void **)net->mib.net_statistics);
err_net_mib:
	snmp_mib_free((void **)net->mib.ip_statistics);
err_ip_mib:
	snmp_mib_free((void **)net->mib.tcp_statistics);
err_tcp_mib:
	return -ENOMEM;
}

static __net_exit void ipv4_mib_exit_net(struct net *net)
{
	snmp_mib_free((void **)net->mib.icmpmsg_statistics);
	snmp_mib_free((void **)net->mib.icmp_statistics);
	snmp_mib_free((void **)net->mib.udplite_statistics);
	snmp_mib_free((void **)net->mib.udp_statistics);
	snmp_mib_free((void **)net->mib.net_statistics);
	snmp_mib_free((void **)net->mib.ip_statistics);
	snmp_mib_free((void **)net->mib.tcp_statistics);
}

static __net_initdata struct pernet_operations ipv4_mib_ops = {
	.init = ipv4_mib_init_net,
	.exit = ipv4_mib_exit_net,
};

Linus Torvalds's avatar
Linus Torvalds committed
static int __init init_ipv4_mibs(void)
{
	return register_pernet_subsys(&ipv4_mib_ops);
Linus Torvalds's avatar
Linus Torvalds committed
}

static int ipv4_proc_init(void);

static struct packet_type ip_packet_type __read_mostly = {
	.type = cpu_to_be16(ETH_P_IP),
	.gso_send_check = inet_gso_send_check,
Herbert Xu's avatar
Herbert Xu committed
	.gso_segment = inet_gso_segment,
Herbert Xu's avatar
Herbert Xu committed
	.gro_receive = inet_gro_receive,
	.gro_complete = inet_gro_complete,
Linus Torvalds's avatar
Linus Torvalds committed
static int __init inet_init(void)
{
	struct sk_buff *dummy_skb;
	struct inet_protosw *q;
	struct list_head *r;
	int rc = -EINVAL;

	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
Linus Torvalds's avatar
Linus Torvalds committed

	rc = proto_register(&tcp_prot, 1);
	if (rc)
		goto out;

	rc = proto_register(&udp_prot, 1);
	if (rc)
		goto out_unregister_tcp_proto;

	rc = proto_register(&raw_prot, 1);
	if (rc)
		goto out_unregister_udp_proto;

	/*
	 *	Tell SOCKET that we are alive...
	(void)sock_register(&inet_family_ops);
Linus Torvalds's avatar
Linus Torvalds committed

#ifdef CONFIG_SYSCTL
	ip_static_sysctl_init();
#endif

Linus Torvalds's avatar
Linus Torvalds committed
	/*
	 *	Add all the base protocols.
	 */

	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
		printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
		printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
		printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
#ifdef CONFIG_IP_MULTICAST
	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
		printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
#endif

	/* Register the socket-side information for inet_create. */
	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
		INIT_LIST_HEAD(r);

	for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
		inet_register_protosw(q);

	/*
	 *	Set the ARP module up
	 */

	arp_init();

	/*
	 *	Set the IP module up
	 */
Linus Torvalds's avatar
Linus Torvalds committed

	ip_init();

Linus Torvalds's avatar
Linus Torvalds committed

	/* Setup TCP slab cache for open requests. */
	tcp_init();

	/* Setup UDP memory threshold */
	udp_init();

	/* Add UDP-Lite (RFC 3828) */
	udplite4_register();
Linus Torvalds's avatar
Linus Torvalds committed

	/*
	 *	Set the ICMP layer up
	 */

	if (icmp_init() < 0)
		panic("Failed to create the ICMP control socket.\n");
Linus Torvalds's avatar
Linus Torvalds committed

	/*
	 *	Initialise the multicast router
	 */
#if defined(CONFIG_IP_MROUTE)
	if (ip_mr_init())
		printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n");
Linus Torvalds's avatar
Linus Torvalds committed
#endif
	/*
	 *	Initialise per-cpu ipv4 mibs
Linus Torvalds's avatar
Linus Torvalds committed

Stephen Hemminger's avatar
Stephen Hemminger committed
	if (init_ipv4_mibs())
		printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n");
Linus Torvalds's avatar
Linus Torvalds committed
	ipv4_proc_init();

	ipfrag_init();

Linus Torvalds's avatar
Linus Torvalds committed
	rc = 0;
out:
	return rc;
out_unregister_udp_proto:
	proto_unregister(&udp_prot);
out_unregister_tcp_proto:
	proto_unregister(&tcp_prot);
Linus Torvalds's avatar
Linus Torvalds committed
	goto out;
}

fs_initcall(inet_init);
Linus Torvalds's avatar
Linus Torvalds committed

/* ------------------------------------------------------------------------ */

#ifdef CONFIG_PROC_FS
static int __init ipv4_proc_init(void)
{
	int rc = 0;

	if (raw_proc_init())
		goto out_raw;
	if (tcp4_proc_init())
		goto out_tcp;
	if (udp4_proc_init())
		goto out_udp;
	if (ip_misc_proc_init())
		goto out_misc;
out:
	return rc;
out_misc:
	udp4_proc_exit();
out_udp:
	tcp4_proc_exit();
out_tcp:
	raw_proc_exit();
out_raw:
	rc = -ENOMEM;
	goto out;
}

#else /* CONFIG_PROC_FS */
static int __init ipv4_proc_init(void)
{
	return 0;
}
#endif /* CONFIG_PROC_FS */

MODULE_ALIAS_NETPROTO(PF_INET);