Newer
Older
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* PF_INET protocol family socket handler.
*
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Florian La Roche, <flla@stud.uni-sb.de>
* Alan Cox, <A.Cox@swansea.ac.uk>
*
* Changes (see also sock.c)
*
* piggy,
* Karl Knutson : Socket protocol table
* A.N.Kuznetsov : Socket death error in accept().
* John Richardson : Fix non blocking error in connect()
* so sockets that fail to connect
* don't return -EINPROGRESS.
* Alan Cox : Asynchronous I/O support
* Alan Cox : Keep correct socket pointer on sock
* structures
* when accept() ed
* Alan Cox : Semantics of SO_LINGER aren't state
* moved to close when you look carefully.
* With this fixed and the accept bug fixed
* some RPC stuff seems happier.
* Niibe Yutaka : 4.4BSD style write async I/O
* Alan Cox,
* Tony Gale : Fixed reuse semantics.
* Alan Cox : bind() shouldn't abort existing but dead
* sockets. Stops FTP netin:.. I hope.
* Alan Cox : bind() works correctly for RAW sockets.
* Note that FreeBSD at least was broken
* in this respect so be careful with
* compatibility tests...
* Alan Cox : routing cache support
* Alan Cox : memzero the socket structure for
* compactness.
* Matt Day : nonblock connect error handler
* Alan Cox : Allow large numbers of pending sockets
* (eg for big web sites), but only if
* specifically application requested.
* Alan Cox : New buffering throughout IP. Used
* dumbly.
* Alan Cox : New buffering now used smartly.
* Alan Cox : BSD rather than common sense
* interpretation of listen.
* Germano Caronni : Assorted small races.
* Alan Cox : sendmsg/recvmsg basic support.
* Alan Cox : Only sendmsg/recvmsg now supported.
* Alan Cox : Locked down bind (see security list).
* Alan Cox : Loosened bind a little.
* Mike McLagan : ADD/DEL DLCI Ioctls
* Willy Konynenberg : Transparent proxying support.
* David S. Miller : New socket lookup architecture.
* Some other random speedups.
* Cyrus Durgin : Cleaned up file for kmod hacks.
* Andi Kleen : Fix inet_stream_connect TCP race.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/netfilter_ipv4.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/inet.h>
#include <linux/igmp.h>

Arnaldo Carvalho de Melo
committed
#include <linux/inetdevice.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
#include <net/route.h>
#include <net/ip_fib.h>

Arnaldo Carvalho de Melo
committed
#include <net/inet_connection_sock.h>
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/raw.h>
#include <net/icmp.h>
#include <net/ipip.h>
#include <net/inet_common.h>
#include <net/xfrm.h>
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
/* The inetsw table contains everything that inet_create needs to
* build a new socket.
*/
static struct list_head inetsw[SOCK_MAX];
static DEFINE_SPINLOCK(inetsw_lock);
struct ipv4_config ipv4_config;
EXPORT_SYMBOL(ipv4_config);
/* New destruction routine */
void inet_sock_destruct(struct sock *sk)
{
struct inet_sock *inet = inet_sk(sk);
__skb_queue_purge(&sk->sk_receive_queue);
__skb_queue_purge(&sk->sk_error_queue);
if (sk->sk_type == SOCK_STREAM && sk->sk_state != TCP_CLOSE) {
pr_err("Attempt to release TCP socket in state %d %p\n",
sk->sk_state, sk);
return;
}
if (!sock_flag(sk, SOCK_DEAD)) {
pr_err("Attempt to release alive inet socket %p\n", sk);
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
WARN_ON(atomic_read(&sk->sk_wmem_alloc));
WARN_ON(sk->sk_wmem_queued);
WARN_ON(sk->sk_forward_alloc);
sk_refcnt_debug_dec(sk);
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/*
* The routines beyond this point handle the behaviour of an AF_INET
* socket object. Mostly it punts to the subprotocols of IP to do
* the work.
*/
/*
* Automatically bind an unbound socket.
*/
static int inet_autobind(struct sock *sk)
{
struct inet_sock *inet;
/* We may need to bind the socket. */
lock_sock(sk);
inet = inet_sk(sk);
if (!inet->num) {
if (sk->sk_prot->get_port(sk, 0)) {
release_sock(sk);
return -EAGAIN;
}
inet->sport = htons(inet->num);
}
release_sock(sk);
return 0;
}
/*
* Move a socket into listening state.
*/
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;
old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
}
sk->sk_max_ack_backlog = backlog;
err = 0;
out:
release_sock(sk);
return err;
}
u32 inet_ehash_secret __read_mostly;
EXPORT_SYMBOL(inet_ehash_secret);
/*
* inet_ehash_secret must be set exactly once
* Instead of using a dedicated spinlock, we (ab)use inetsw_lock
*/
void build_ehash_secret(void)
{
u32 rnd;
do {
get_random_bytes(&rnd, sizeof(rnd));
} while (rnd == 0);
spin_lock_bh(&inetsw_lock);
if (!inet_ehash_secret)
inet_ehash_secret = rnd;
spin_unlock_bh(&inetsw_lock);
}
EXPORT_SYMBOL(build_ehash_secret);
static inline int inet_netns_ok(struct net *net, int protocol)
{
int hash;
return 1;
hash = protocol & (MAX_INET_PROTOS - 1);
ipprot = rcu_dereference(inet_protos[hash]);
if (ipprot == NULL)
/* raw IP is OK */
return 1;
return ipprot->netns_ok;
}
static int inet_create(struct net *net, struct socket *sock, int protocol)
{
struct sock *sk;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
char answer_no_check;
int try_loading_module = 0;
if (unlikely(!inet_ehash_secret))
if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
build_ehash_secret();
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
lookup_protocol:
list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
/* Check the non-wild match. */
if (protocol == answer->protocol) {
if (protocol != IPPROTO_IP)
break;
} else {
/* Check for the two wild cases. */
if (IPPROTO_IP == protocol) {
protocol = answer->protocol;
break;
}
if (IPPROTO_IP == answer->protocol)
break;
}
if (try_loading_module < 2) {
rcu_read_unlock();
/*
* Be more specific, e.g. net-pf-2-proto-132-type-1
* (net-pf-PF_INET-proto-IPPROTO_SCTP-type-SOCK_STREAM)
*/
if (++try_loading_module == 1)
request_module("net-pf-%d-proto-%d-type-%d",
PF_INET, protocol, sock->type);
/*
* Fall back to generic, e.g. net-pf-2-proto-132
* (net-pf-PF_INET-proto-IPPROTO_SCTP)
*/
else
request_module("net-pf-%d-proto-%d",
PF_INET, protocol);
goto lookup_protocol;
} else
goto out_rcu_unlock;
}
err = -EPERM;
if (answer->capability > 0 && !capable(answer->capability))
goto out_rcu_unlock;
err = -EAFNOSUPPORT;
if (!inet_netns_ok(net, protocol))
goto out_rcu_unlock;
sock->ops = answer->ops;
answer_prot = answer->prot;
answer_no_check = answer->no_check;
answer_flags = answer->flags;
rcu_read_unlock();
WARN_ON(answer_prot->slab == NULL);
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot);
if (sk == NULL)
goto out;
err = 0;
sk->sk_no_check = answer_no_check;
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = 1;
inet = inet_sk(sk);
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
if (SOCK_RAW == sock->type) {
inet->num = protocol;
if (IPPROTO_RAW == protocol)
inet->hdrincl = 1;
}
if (ipv4_config.no_pmtu_disc)
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
inet->id = 0;
sock_init_data(sock, sk);
sk->sk_destruct = inet_sock_destruct;
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
inet->uc_ttl = -1;
inet->mc_loop = 1;
inet->mc_ttl = 1;
sk_refcnt_debug_inc(sk);
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
if (inet->num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
inet->sport = htons(inet->num);
/* Add to protocol hash chains. */
sk->sk_prot->hash(sk);
}
if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk);
if (err)
sk_common_release(sk);
}
out:
return err;
out_rcu_unlock:
rcu_read_unlock();
goto out;
}
/*
* The peer socket should always be NULL (or else). When we call this
* function we are destroying the object and from then on nobody
* should refer to it.
*/
int inet_release(struct socket *sock)
{
struct sock *sk = sock->sk;
if (sk) {
long timeout;
/* Applications forget to leave groups before exiting */
ip_mc_drop_socket(sk);
/* If linger is set, we don't return until the close
* is complete. Otherwise we return immediately. The
* actually closing is done the same either way.
*
* If the close is due to the process exiting, we never
* linger..
*/
timeout = 0;
if (sock_flag(sk, SOCK_LINGER) &&
!(current->flags & PF_EXITING))
timeout = sk->sk_lingertime;
sock->sk = NULL;
sk->sk_prot->close(sk, timeout);
}
return 0;
}
int sysctl_ip_nonlocal_bind __read_mostly;
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
unsigned short snum;
int chk_addr_ret;
int err;
/* If the socket has its own bind function then use it. (RAW) */
if (sk->sk_prot->bind) {
err = sk->sk_prot->bind(sk, uaddr, addr_len);
goto out;
}
err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
goto out;
Loading
Loading full blame...