Skip to content
Snippets Groups Projects
af_inet.c 45.7 KiB
Newer Older
		.type =       SOCK_STREAM,
		.protocol =   IPPROTO_TCP,
		.prot =       &tcp_prot,
		.ops =        &inet_stream_ops,
		.no_check =   0,
		.flags =      INET_PROTOSW_PERMANENT |
	},

	{
		.type =       SOCK_DGRAM,
		.protocol =   IPPROTO_UDP,
		.prot =       &udp_prot,
		.ops =        &inet_dgram_ops,
		.no_check =   UDP_CSUM_DEFAULT,
		.flags =      INET_PROTOSW_PERMANENT,
Linus Torvalds's avatar
Linus Torvalds committed
       },
       {
		.type =       SOCK_DGRAM,
		.protocol =   IPPROTO_ICMP,
		.prot =       &ping_prot,
		.ops =        &inet_dgram_ops,
		.no_check =   UDP_CSUM_DEFAULT,
		.flags =      INET_PROTOSW_REUSE,
       },
	       .type =       SOCK_RAW,
	       .protocol =   IPPROTO_IP,	/* wild card */
	       .prot =       &raw_prot,
	       .ops =        &inet_sockraw_ops,
	       .no_check =   UDP_CSUM_DEFAULT,
	       .flags =      INET_PROTOSW_REUSE,
#define INETSW_ARRAY_LEN ARRAY_SIZE(inetsw_array)
Linus Torvalds's avatar
Linus Torvalds committed

void inet_register_protosw(struct inet_protosw *p)
{
	struct list_head *lh;
	struct inet_protosw *answer;
	int protocol = p->protocol;
	struct list_head *last_perm;

	spin_lock_bh(&inetsw_lock);

	if (p->type >= SOCK_MAX)
		goto out_illegal;

	/* If we are trying to override a permanent protocol, bail. */
	answer = NULL;
	last_perm = &inetsw[p->type];
	list_for_each(lh, &inetsw[p->type]) {
		answer = list_entry(lh, struct inet_protosw, list);

		/* Check only the non-wild match. */
		if (INET_PROTOSW_PERMANENT & answer->flags) {
			if (protocol == answer->protocol)
				break;
			last_perm = lh;
		}

		answer = NULL;
	}
	if (answer)
		goto out_permanent;

	/* Add the new entry after the last permanent entry if any, so that
	 * the new entry does not override a permanent entry when matched with
	 * a wild-card protocol. But it is allowed to override any existing
	 * non-permanent entry.  This means that when we remove this entry, the
Linus Torvalds's avatar
Linus Torvalds committed
	 * system automatically returns to the old behavior.
	 */
	list_add_rcu(&p->list, last_perm);
out:
	spin_unlock_bh(&inetsw_lock);

	return;

out_permanent:
	pr_err("Attempt to override permanent protocol %d\n", protocol);
Linus Torvalds's avatar
Linus Torvalds committed
	goto out;

out_illegal:
	pr_err("Ignoring attempt to register invalid socket type %d\n",
Linus Torvalds's avatar
Linus Torvalds committed
	       p->type);
	goto out;
}
Eric Dumazet's avatar
Eric Dumazet committed
EXPORT_SYMBOL(inet_register_protosw);
Linus Torvalds's avatar
Linus Torvalds committed

void inet_unregister_protosw(struct inet_protosw *p)
{
	if (INET_PROTOSW_PERMANENT & p->flags) {
		pr_err("Attempt to unregister permanent protocol %d\n",
Linus Torvalds's avatar
Linus Torvalds committed
		       p->protocol);
	} else {
		spin_lock_bh(&inetsw_lock);
		list_del_rcu(&p->list);
		spin_unlock_bh(&inetsw_lock);

		synchronize_net();
	}
}
Eric Dumazet's avatar
Eric Dumazet committed
EXPORT_SYMBOL(inet_unregister_protosw);
Linus Torvalds's avatar
Linus Torvalds committed

/*
 *      Shall we try to damage output packets if routing dev changes?
 */

int sysctl_ip_dynaddr __read_mostly;

static int inet_sk_reselect_saddr(struct sock *sk)
{
	struct inet_sock *inet = inet_sk(sk);
	__be32 old_saddr = inet->inet_saddr;
	__be32 daddr = inet->inet_daddr;
	struct rtable *rt;
	__be32 new_saddr;
	struct ip_options_rcu *inet_opt;
	inet_opt = rcu_dereference_protected(inet->inet_opt,
					     sock_owned_by_user(sk));
	if (inet_opt && inet_opt->opt.srr)
		daddr = inet_opt->opt.faddr;
	fl4 = &inet->cork.fl.u.ip4;
	rt = ip_route_connect(fl4, daddr, 0, RT_CONN_FLAGS(sk),
			      sk->sk_bound_dev_if, sk->sk_protocol,
			      inet->inet_sport, inet->inet_dport, sk);
	if (IS_ERR(rt))
		return PTR_ERR(rt);
	sk_setup_caps(sk, &rt->dst);

	if (new_saddr == old_saddr)
		return 0;

	if (sysctl_ip_dynaddr > 1) {
		pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
			__func__, &old_saddr, &new_saddr);
	inet->inet_saddr = inet->inet_rcv_saddr = new_saddr;

	/*
	 * XXX The only one ugly spot where we need to
	 * XXX really change the sockets identity after
	 * XXX it has entered the hashes. -DaveM
	 *
	 * Besides that, it does not check for connection
	 * uniqueness. Wait for troubles.
	 */
	__sk_prot_rehash(sk);
	return 0;
}

int inet_sk_rebuild_header(struct sock *sk)
{
	struct inet_sock *inet = inet_sk(sk);
	struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0);
	__be32 daddr;
	struct ip_options_rcu *inet_opt;
	int err;

	/* Route is OK, nothing to do. */
	if (rt)
		return 0;

	/* Reroute. */
	rcu_read_lock();
	inet_opt = rcu_dereference(inet->inet_opt);
	daddr = inet->inet_daddr;
	if (inet_opt && inet_opt->opt.srr)
		daddr = inet_opt->opt.faddr;
	rcu_read_unlock();
	fl4 = &inet->cork.fl.u.ip4;
	rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr,
				   inet->inet_dport, inet->inet_sport,
				   sk->sk_protocol, RT_CONN_FLAGS(sk),
				   sk->sk_bound_dev_if);
		sk_setup_caps(sk, &rt->dst);
		/* Routing failed... */
		sk->sk_route_caps = 0;
		/*
		 * Other protocols have to map its equivalent state to TCP_SYN_SENT.
		 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
		 */
		if (!sysctl_ip_dynaddr ||
		    sk->sk_state != TCP_SYN_SENT ||
		    (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
		    (err = inet_sk_reselect_saddr(sk)) != 0)
			sk->sk_err_soft = -err;
	}

	return err;
}
EXPORT_SYMBOL(inet_sk_rebuild_header);

static int inet_gso_send_check(struct sk_buff *skb)
{
	const struct net_offload *ops;
	const struct iphdr *iph;
	int proto;
	int ihl;
	int err = -EINVAL;

	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
		goto out;

	ihl = iph->ihl * 4;
	if (ihl < sizeof(*iph))
		goto out;

	proto = iph->protocol;

	/* Warning: after this point, iph might be no longer valid */
	if (unlikely(!pskb_may_pull(skb, ihl)))
		goto out;
	skb_reset_transport_header(skb);
	ops = rcu_dereference(inet_offloads[proto]);
	if (likely(ops && ops->callbacks.gso_send_check))
		err = ops->callbacks.gso_send_check(skb);
static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
					netdev_features_t features)
Herbert Xu's avatar
Herbert Xu committed
{
	struct sk_buff *segs = ERR_PTR(-EINVAL);
	const struct net_offload *ops;
	unsigned int offset = 0;
	bool udpfrag, encap;
	struct iphdr *iph;
Herbert Xu's avatar
Herbert Xu committed
	int proto;
Herbert Xu's avatar
Herbert Xu committed
	int ihl;
	int id;

	if (unlikely(skb_shinfo(skb)->gso_type &
		     ~(SKB_GSO_TCPV4 |
		       SKB_GSO_UDP |
		       SKB_GSO_DODGY |
		       SKB_GSO_TCP_ECN |
Eric Dumazet's avatar
Eric Dumazet committed
		       SKB_GSO_IPIP |
		       SKB_GSO_SIT |
		       SKB_GSO_TCPV6 |
		       SKB_GSO_UDP_TUNNEL |
		       SKB_GSO_MPLS |
	skb_reset_network_header(skb);
	nhoff = skb_network_header(skb) - skb_mac_header(skb);
	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
Herbert Xu's avatar
Herbert Xu committed
		goto out;

Herbert Xu's avatar
Herbert Xu committed
	ihl = iph->ihl * 4;
	if (ihl < sizeof(*iph))
		goto out;

	id = ntohs(iph->id);
	proto = iph->protocol;

	/* Warning: after this point, iph might be no longer valid */
	if (unlikely(!pskb_may_pull(skb, ihl)))
Herbert Xu's avatar
Herbert Xu committed
		goto out;
	__skb_pull(skb, ihl);
	encap = SKB_GSO_CB(skb)->encap_level > 0;
	if (encap)
		features = skb->dev->hw_enc_features & netif_skb_features(skb);
	SKB_GSO_CB(skb)->encap_level += ihl;
	skb_reset_transport_header(skb);
Herbert Xu's avatar
Herbert Xu committed
	segs = ERR_PTR(-EPROTONOSUPPORT);

	if (skb->encapsulation &&
	    skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
		udpfrag = proto == IPPROTO_UDP && encap;
	else
		udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
	ops = rcu_dereference(inet_offloads[proto]);
	if (likely(ops && ops->callbacks.gso_segment))
		segs = ops->callbacks.gso_segment(skb, features);
	if (IS_ERR_OR_NULL(segs))
Herbert Xu's avatar
Herbert Xu committed
		goto out;

	skb = segs;
	do {
		iph = (struct iphdr *)(skb_mac_header(skb) + nhoff);
		if (udpfrag) {
			iph->id = htons(id);
			iph->frag_off = htons(offset >> 3);
			if (skb->next != NULL)
				iph->frag_off |= htons(IP_MF);
			offset += skb->len - nhoff - ihl;
		} else {
		iph->tot_len = htons(skb->len - nhoff);
		ip_send_check(iph);
		if (encap)
			skb_reset_inner_headers(skb);
		skb->network_header = (u8 *)iph - skb->head;
Herbert Xu's avatar
Herbert Xu committed
	} while ((skb = skb->next));

out:
	return segs;
}

Herbert Xu's avatar
Herbert Xu committed
static struct sk_buff **inet_gro_receive(struct sk_buff **head,
					 struct sk_buff *skb)
{
	const struct net_offload *ops;
Herbert Xu's avatar
Herbert Xu committed
	struct sk_buff **pp = NULL;
	struct sk_buff *p;
	const struct iphdr *iph;
	unsigned int hlen;
	unsigned int off;
Herbert Xu's avatar
Herbert Xu committed
	int flush = 1;
	int proto;

	off = skb_gro_offset(skb);
	hlen = off + sizeof(*iph);
	iph = skb_gro_header_fast(skb, off);
	if (skb_gro_header_hard(skb, hlen)) {
		iph = skb_gro_header_slow(skb, hlen, off);
		if (unlikely(!iph))
			goto out;
	}
	proto = iph->protocol;
Herbert Xu's avatar
Herbert Xu committed

	rcu_read_lock();
	ops = rcu_dereference(inet_offloads[proto]);
	if (!ops || !ops->callbacks.gro_receive)
Herbert Xu's avatar
Herbert Xu committed
		goto out_unlock;

	if (*(u8 *)iph != 0x45)
Herbert Xu's avatar
Herbert Xu committed
		goto out_unlock;

	if (unlikely(ip_fast_csum((u8 *)iph, 5)))
Herbert Xu's avatar
Herbert Xu committed
		goto out_unlock;

	id = ntohl(*(__be32 *)&iph->id);
	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
Herbert Xu's avatar
Herbert Xu committed

	for (p = *head; p; p = p->next) {
		struct iphdr *iph2;

		if (!NAPI_GRO_CB(p)->same_flow)
			continue;

		iph2 = (struct iphdr *)(p->data + off);
		/* The above works because, with the exception of the top
		 * (inner most) layer, we only aggregate pkts with the same
		 * hdr length so all the hdrs we'll need to verify will start
		 * at the same offset.
		 */
		if ((iph->protocol ^ iph2->protocol) |
		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
Herbert Xu's avatar
Herbert Xu committed
			NAPI_GRO_CB(p)->same_flow = 0;
			continue;
		}

		/* All fields must match except length and checksum. */
		NAPI_GRO_CB(p)->flush |=
			(iph->ttl ^ iph2->ttl) |
			(iph->tos ^ iph2->tos) |
			((iph->frag_off ^ iph2->frag_off) & htons(IP_DF));
		/* Save the IP ID check to be included later when we get to
		 * the transport layer so only the inner most IP ID is checked.
		 * This is because some GSO/TSO implementations do not
		 * correctly increment the IP ID for the outer hdrs.
		 */
		NAPI_GRO_CB(p)->flush_id =
			    ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
Herbert Xu's avatar
Herbert Xu committed
		NAPI_GRO_CB(p)->flush |= flush;
	}

	NAPI_GRO_CB(skb)->flush |= flush;
	skb_set_network_header(skb, off);
	/* The above will be needed by the transport layer if there is one
	 * immediately following this IP hdr.
	 */

	skb_gro_pull(skb, sizeof(*iph));
	skb_set_transport_header(skb, skb_gro_offset(skb));
	pp = ops->callbacks.gro_receive(head, skb);
Herbert Xu's avatar
Herbert Xu committed

out_unlock:
	rcu_read_unlock();

out:
	NAPI_GRO_CB(skb)->flush |= flush;

	return pp;
}

static int inet_gro_complete(struct sk_buff *skb, int nhoff)
	__be16 newlen = htons(skb->len - nhoff);
	struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
	const struct net_offload *ops;
	int proto = iph->protocol;
Herbert Xu's avatar
Herbert Xu committed
	int err = -ENOSYS;

	csum_replace2(&iph->check, iph->tot_len, newlen);
	iph->tot_len = newlen;

	rcu_read_lock();
	ops = rcu_dereference(inet_offloads[proto]);
	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
Herbert Xu's avatar
Herbert Xu committed
		goto out_unlock;

	/* Only need to add sizeof(*iph) to get to the next hdr below
	 * because any hdr with option will have been flushed in
	 * inet_gro_receive().
	 */
	err = ops->callbacks.gro_complete(skb, nhoff + sizeof(*iph));
Herbert Xu's avatar
Herbert Xu committed

out_unlock:
	rcu_read_unlock();

	return err;
}

int inet_ctl_sock_create(struct sock **sk, unsigned short family,
			 unsigned short type, unsigned char protocol,
			 struct net *net)
	struct socket *sock;
	int rc = sock_create_kern(family, type, protocol, &sock);
		*sk = sock->sk;
		(*sk)->sk_allocation = GFP_ATOMIC;
		/*
		 * Unhash it so that IP input processing does not even see it,
		 * we do not wish this socket to see incoming packets.
		 */
	}
	return rc;
}
EXPORT_SYMBOL_GPL(inet_ctl_sock_create);

unsigned long snmp_fold_field(void __percpu *mib[], int offt)
{
	unsigned long res = 0;
	int i, j;

	for_each_possible_cpu(i) {
		for (j = 0; j < SNMP_ARRAY_SZ; j++)
			res += *(((unsigned long *) per_cpu_ptr(mib[j], i)) + offt);
	}
	return res;
}
EXPORT_SYMBOL_GPL(snmp_fold_field);

#if BITS_PER_LONG==32

u64 snmp_fold_field64(void __percpu *mib[], int offt, size_t syncp_offset)
{
	u64 res = 0;
	int cpu;

	for_each_possible_cpu(cpu) {
		void *bhptr;
		struct u64_stats_sync *syncp;
		unsigned int start;

		bhptr = per_cpu_ptr(mib[0], cpu);
		syncp = (struct u64_stats_sync *)(bhptr + syncp_offset);
		do {
			start = u64_stats_fetch_begin_irq(syncp);
			v = *(((u64 *) bhptr) + offt);
		} while (u64_stats_fetch_retry_irq(syncp, start));
		res += v;
	}
	return res;
}
EXPORT_SYMBOL_GPL(snmp_fold_field64);
#endif

int snmp_mib_init(void __percpu *ptr[2], size_t mibsize, size_t align)
{
	BUG_ON(ptr == NULL);
	ptr[0] = __alloc_percpu(mibsize, align);
	if (!ptr[0])
		return -ENOMEM;
#if SNMP_ARRAY_SZ == 2
	ptr[1] = __alloc_percpu(mibsize, align);
	if (!ptr[1]) {
		free_percpu(ptr[0]);
		ptr[0] = NULL;
		return -ENOMEM;
	}
#endif
	return 0;
}
EXPORT_SYMBOL_GPL(snmp_mib_init);

Linus Torvalds's avatar
Linus Torvalds committed
#ifdef CONFIG_IP_MULTICAST
static const struct net_protocol igmp_protocol = {
Linus Torvalds's avatar
Linus Torvalds committed
	.handler =	igmp_rcv,
Linus Torvalds's avatar
Linus Torvalds committed
};
#endif

static const struct net_protocol tcp_protocol = {
	.early_demux	=	tcp_v4_early_demux,
	.handler	=	tcp_v4_rcv,
	.err_handler	=	tcp_v4_err,
	.no_policy	=	1,
	.netns_ok	=	1,
	.icmp_strict_tag_validation = 1,
static const struct net_protocol udp_protocol = {
	.early_demux =	udp_v4_early_demux,
Linus Torvalds's avatar
Linus Torvalds committed
	.handler =	udp_rcv,
	.err_handler =	udp_err,
	.no_policy =	1,
static const struct net_protocol icmp_protocol = {
Linus Torvalds's avatar
Linus Torvalds committed
	.handler =	icmp_rcv,
	.err_handler =	icmp_err,
	.no_policy =	1,
static __net_init int ipv4_mib_init_net(struct net *net)
{
	if (snmp_mib_init((void __percpu **)net->mib.tcp_statistics,
			  sizeof(struct tcp_mib),
			  __alignof__(struct tcp_mib)) < 0)
		goto err_tcp_mib;
	if (snmp_mib_init((void __percpu **)net->mib.ip_statistics,
			  sizeof(struct ipstats_mib),
			  __alignof__(struct ipstats_mib)) < 0)
		goto err_ip_mib;

	for_each_possible_cpu(i) {
		struct ipstats_mib *af_inet_stats;
		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[0], i);
		u64_stats_init(&af_inet_stats->syncp);
#if SNMP_ARRAY_SZ == 2
		af_inet_stats = per_cpu_ptr(net->mib.ip_statistics[1], i);
		u64_stats_init(&af_inet_stats->syncp);
#endif
	}

	if (snmp_mib_init((void __percpu **)net->mib.net_statistics,
			  sizeof(struct linux_mib),
			  __alignof__(struct linux_mib)) < 0)
		goto err_net_mib;
	if (snmp_mib_init((void __percpu **)net->mib.udp_statistics,
			  sizeof(struct udp_mib),
			  __alignof__(struct udp_mib)) < 0)
		goto err_udp_mib;
	if (snmp_mib_init((void __percpu **)net->mib.udplite_statistics,
			  sizeof(struct udp_mib),
			  __alignof__(struct udp_mib)) < 0)
		goto err_udplite_mib;
	if (snmp_mib_init((void __percpu **)net->mib.icmp_statistics,
			  sizeof(struct icmp_mib),
			  __alignof__(struct icmp_mib)) < 0)
		goto err_icmp_mib;
	net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
					      GFP_KERNEL);
	if (!net->mib.icmpmsg_statistics)
		goto err_icmpmsg_mib;

	tcp_mib_init(net);
	return 0;
err_icmpmsg_mib:
	snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
	snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
err_udplite_mib:
	snmp_mib_free((void __percpu **)net->mib.udp_statistics);
	snmp_mib_free((void __percpu **)net->mib.net_statistics);
	snmp_mib_free((void __percpu **)net->mib.ip_statistics);
	snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
err_tcp_mib:
	return -ENOMEM;
}

static __net_exit void ipv4_mib_exit_net(struct net *net)
{
	kfree(net->mib.icmpmsg_statistics);
	snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
	snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
	snmp_mib_free((void __percpu **)net->mib.udp_statistics);
	snmp_mib_free((void __percpu **)net->mib.net_statistics);
	snmp_mib_free((void __percpu **)net->mib.ip_statistics);
	snmp_mib_free((void __percpu **)net->mib.tcp_statistics);
}

static __net_initdata struct pernet_operations ipv4_mib_ops = {
	.init = ipv4_mib_init_net,
	.exit = ipv4_mib_exit_net,
};

Linus Torvalds's avatar
Linus Torvalds committed
static int __init init_ipv4_mibs(void)
{
	return register_pernet_subsys(&ipv4_mib_ops);
static __net_init int inet_init_net(struct net *net)
{
	/*
	 * Set defaults for local port range
	 */
	seqlock_init(&net->ipv4.ip_local_ports.lock);
	net->ipv4.ip_local_ports.range[0] =  32768;
	net->ipv4.ip_local_ports.range[1] =  61000;
	return 0;
}

static __net_exit void inet_exit_net(struct net *net)
{
}

static __net_initdata struct pernet_operations af_inet_ops = {
	.init = inet_init_net,
	.exit = inet_exit_net,
};

static int __init init_inet_pernet_ops(void)
{
	return register_pernet_subsys(&af_inet_ops);
}

Linus Torvalds's avatar
Linus Torvalds committed
static int ipv4_proc_init(void);

static struct packet_offload ip_packet_offload __read_mostly = {
	.type = cpu_to_be16(ETH_P_IP),
	.callbacks = {
		.gso_send_check = inet_gso_send_check,
		.gso_segment = inet_gso_segment,
		.gro_receive = inet_gro_receive,
		.gro_complete = inet_gro_complete,
	},
Eric Dumazet's avatar
Eric Dumazet committed
static const struct net_offload ipip_offload = {
	.callbacks = {
		.gso_send_check = inet_gso_send_check,
		.gso_segment	= inet_gso_segment,
	},
};

static int __init ipv4_offload_init(void)
{
	/*
	 * Add offloads
	 */
	if (udpv4_offload_init() < 0)
		pr_crit("%s: Cannot add UDP protocol offload\n", __func__);
	if (tcpv4_offload_init() < 0)
		pr_crit("%s: Cannot add TCP protocol offload\n", __func__);

	dev_add_offload(&ip_packet_offload);
Eric Dumazet's avatar
Eric Dumazet committed
	inet_add_offload(&ipip_offload, IPPROTO_IPIP);
	return 0;
}

fs_initcall(ipv4_offload_init);

static struct packet_type ip_packet_type __read_mostly = {
	.type = cpu_to_be16(ETH_P_IP),
	.func = ip_rcv,
};

Linus Torvalds's avatar
Linus Torvalds committed
static int __init inet_init(void)
{
	struct inet_protosw *q;
	struct list_head *r;
	int rc = -EINVAL;

	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
Linus Torvalds's avatar
Linus Torvalds committed

	sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
	if (!sysctl_local_reserved_ports)
		goto out;

Linus Torvalds's avatar
Linus Torvalds committed
	rc = proto_register(&tcp_prot, 1);
	if (rc)
		goto out_free_reserved_ports;
Linus Torvalds's avatar
Linus Torvalds committed

	rc = proto_register(&udp_prot, 1);
	if (rc)
		goto out_unregister_tcp_proto;

	rc = proto_register(&raw_prot, 1);
	if (rc)
		goto out_unregister_udp_proto;

	rc = proto_register(&ping_prot, 1);
	if (rc)
		goto out_unregister_raw_proto;

Linus Torvalds's avatar
Linus Torvalds committed
	/*
	 *	Tell SOCKET that we are alive...
	(void)sock_register(&inet_family_ops);
Linus Torvalds's avatar
Linus Torvalds committed

#ifdef CONFIG_SYSCTL
	ip_static_sysctl_init();
#endif

Linus Torvalds's avatar
Linus Torvalds committed
	/*
	 *	Add all the base protocols.
	 */

	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
		pr_crit("%s: Cannot add ICMP protocol\n", __func__);
Linus Torvalds's avatar
Linus Torvalds committed
	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
		pr_crit("%s: Cannot add UDP protocol\n", __func__);
Linus Torvalds's avatar
Linus Torvalds committed
	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
		pr_crit("%s: Cannot add TCP protocol\n", __func__);
Linus Torvalds's avatar
Linus Torvalds committed
#ifdef CONFIG_IP_MULTICAST
	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
		pr_crit("%s: Cannot add IGMP protocol\n", __func__);
Linus Torvalds's avatar
Linus Torvalds committed
#endif

	/* Register the socket-side information for inet_create. */
	for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
		INIT_LIST_HEAD(r);

	for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
		inet_register_protosw(q);

	/*
	 *	Set the ARP module up
	 */

	arp_init();

	/*
	 *	Set the IP module up
	 */
Linus Torvalds's avatar
Linus Torvalds committed

	ip_init();

Linus Torvalds's avatar
Linus Torvalds committed

	/* Setup TCP slab cache for open requests. */
	tcp_init();

	/* Setup UDP memory threshold */
	udp_init();

	/* Add UDP-Lite (RFC 3828) */
	udplite4_register();
Linus Torvalds's avatar
Linus Torvalds committed

Linus Torvalds's avatar
Linus Torvalds committed
	/*
	 *	Set the ICMP layer up
	 */

	if (icmp_init() < 0)
		panic("Failed to create the ICMP control socket.\n");
Linus Torvalds's avatar
Linus Torvalds committed

	/*
	 *	Initialise the multicast router
	 */
#if defined(CONFIG_IP_MROUTE)
	if (ip_mr_init())
		pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
Linus Torvalds's avatar
Linus Torvalds committed
#endif

	if (init_inet_pernet_ops())
		pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
Linus Torvalds's avatar
Linus Torvalds committed
	/*
	 *	Initialise per-cpu ipv4 mibs
Linus Torvalds's avatar
Linus Torvalds committed

Stephen Hemminger's avatar
Stephen Hemminger committed
	if (init_ipv4_mibs())
		pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
Linus Torvalds's avatar
Linus Torvalds committed
	ipv4_proc_init();

	ipfrag_init();

Linus Torvalds's avatar
Linus Torvalds committed
	rc = 0;
out:
	return rc;
out_unregister_raw_proto:
	proto_unregister(&raw_prot);
Linus Torvalds's avatar
Linus Torvalds committed
out_unregister_udp_proto:
	proto_unregister(&udp_prot);
out_unregister_tcp_proto:
	proto_unregister(&tcp_prot);
out_free_reserved_ports:
	kfree(sysctl_local_reserved_ports);
Linus Torvalds's avatar
Linus Torvalds committed
	goto out;
}

fs_initcall(inet_init);
Linus Torvalds's avatar
Linus Torvalds committed

/* ------------------------------------------------------------------------ */

#ifdef CONFIG_PROC_FS
static int __init ipv4_proc_init(void)
{
	int rc = 0;

	if (raw_proc_init())
		goto out_raw;
	if (tcp4_proc_init())
		goto out_tcp;
	if (udp4_proc_init())
		goto out_udp;
	if (ping_proc_init())
		goto out_ping;
Linus Torvalds's avatar
Linus Torvalds committed
	if (ip_misc_proc_init())
		goto out_misc;
out:
	return rc;
out_misc:
	ping_proc_exit();
out_ping:
Linus Torvalds's avatar
Linus Torvalds committed
	udp4_proc_exit();
out_udp:
	tcp4_proc_exit();
out_tcp:
	raw_proc_exit();
out_raw:
	rc = -ENOMEM;
	goto out;
}

#else /* CONFIG_PROC_FS */
static int __init ipv4_proc_init(void)
{
	return 0;
}
#endif /* CONFIG_PROC_FS */

MODULE_ALIAS_NETPROTO(PF_INET);