Skip to content
Snippets Groups Projects
socket.c 83.5 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    EXPORT_SYMBOL(vlan_ioctl_set);
    
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    static DEFINE_MUTEX(dlci_ioctl_mutex);
    
    static int (*dlci_ioctl_hook) (unsigned int, void __user *);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    	mutex_lock(&dlci_ioctl_mutex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	dlci_ioctl_hook = hook;
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    	mutex_unlock(&dlci_ioctl_mutex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    EXPORT_SYMBOL(dlci_ioctl_set);
    
    
    static long sock_do_ioctl(struct net *net, struct socket *sock,
    				 unsigned int cmd, unsigned long arg)
    {
    	int err;
    	void __user *argp = (void __user *)arg;
    
    	err = sock->ops->ioctl(sock, cmd, arg);
    
    	/*
    	 * If this ioctl is unknown try to hand it down
    	 * to the NIC driver.
    	 */
    	if (err == -ENOIOCTLCMD)
    		err = dev_ioctl(net, cmd, argp);
    
    	return err;
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	With an ioctl, arg may well be a user mode pointer, but we don't know
     *	what to do with it - that's up to the protocol still.
     */
    
    static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
    {
    	struct socket *sock;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	void __user *argp = (void __user *)arg;
    	int pid, err;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	sock = file->private_data;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
    
    		err = dev_ioctl(net, cmd, argp);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else
    
    Johannes Berg's avatar
    Johannes Berg committed
    #ifdef CONFIG_WEXT_CORE
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
    
    		err = dev_ioctl(net, cmd, argp);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else
    
    Johannes Berg's avatar
    Johannes Berg committed
    #endif
    
    		switch (cmd) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		case FIOSETOWN:
    		case SIOCSPGRP:
    			err = -EFAULT;
    			if (get_user(pid, (int __user *)argp))
    				break;
    			err = f_setown(sock->file, pid, 1);
    			break;
    		case FIOGETOWN:
    		case SIOCGPGRP:
    
    			err = put_user(f_getown(sock->file),
    
    				       (int __user *)argp);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			break;
    		case SIOCGIFBR:
    		case SIOCSIFBR:
    		case SIOCBRADDBR:
    		case SIOCBRDELBR:
    			err = -ENOPKG;
    			if (!br_ioctl_hook)
    				request_module("bridge");
    
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    			mutex_lock(&br_ioctl_mutex);
    
    			if (br_ioctl_hook)
    
    				err = br_ioctl_hook(net, cmd, argp);
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    			mutex_unlock(&br_ioctl_mutex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			break;
    		case SIOCGIFVLAN:
    		case SIOCSIFVLAN:
    			err = -ENOPKG;
    			if (!vlan_ioctl_hook)
    				request_module("8021q");
    
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    			mutex_lock(&vlan_ioctl_mutex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (vlan_ioctl_hook)
    
    				err = vlan_ioctl_hook(net, argp);
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    			mutex_unlock(&vlan_ioctl_mutex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			break;
    		case SIOCADDDLCI:
    		case SIOCDELDLCI:
    			err = -ENOPKG;
    			if (!dlci_ioctl_hook)
    				request_module("dlci");
    
    
    			mutex_lock(&dlci_ioctl_mutex);
    			if (dlci_ioctl_hook)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				err = dlci_ioctl_hook(cmd, argp);
    
    			mutex_unlock(&dlci_ioctl_mutex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			break;
    		default:
    
    			err = sock_do_ioctl(net, sock, cmd, arg);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			break;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return err;
    }
    
    int sock_create_lite(int family, int type, int protocol, struct socket **res)
    {
    	int err;
    	struct socket *sock = NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	err = security_socket_create(family, type, protocol, 1);
    	if (err)
    		goto out;
    
    	sock = sock_alloc();
    	if (!sock) {
    		err = -ENOMEM;
    		goto out;
    	}
    
    	sock->type = type;
    
    	err = security_socket_post_create(sock, family, type, protocol, 1);
    	if (err)
    		goto out_release;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	*res = sock;
    	return err;
    
    out_release:
    	sock_release(sock);
    	sock = NULL;
    	goto out;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /* No kernel lock held - perfect */
    
    static unsigned int sock_poll(struct file *file, poll_table *wait)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	unsigned int busy_flag = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct socket *sock;
    
    	/*
    
    	 *      We can't return errors to poll, so it's either yes or no.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    
    	sock = file->private_data;
    
    	if (sk_can_busy_loop(sock->sk)) {
    
    		/* this socket can poll_ll so tell the system call */
    
    		busy_flag = POLL_BUSY_LOOP;
    
    
    		/* once, only if requested by syscall */
    
    		if (wait && (wait->_key & POLL_BUSY_LOOP))
    			sk_busy_loop(sock->sk, 1);
    
    	return busy_flag | sock->ops->poll(file, sock, wait);
    
    static int sock_mmap(struct file *file, struct vm_area_struct *vma)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct socket *sock = file->private_data;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	return sock->ops->mmap(file, sock, vma);
    }
    
    
    static int sock_close(struct inode *inode, struct file *filp)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	sock_release(SOCKET_I(inode));
    	return 0;
    }
    
    /*
     *	Update the socket async list
     *
     *	Fasync_list locking strategy.
     *
     *	1. fasync_list is modified only under process context socket lock
     *	   i.e. under semaphore.
     *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
    
     *	   or under socket lock
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    static int sock_fasync(int fd, struct file *filp, int on)
    {
    
    	struct socket *sock = filp->private_data;
    	struct sock *sk = sock->sk;
    
    	struct socket_wq *wq;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (sk == NULL)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return -EINVAL;
    
    	lock_sock(sk);
    
    	wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk));
    	fasync_helper(fd, filp, on, &wq->fasync_list);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (!wq->fasync_list)
    
    		sock_reset_flag(sk, SOCK_FASYNC);
    	else
    
    		sock_set_flag(sk, SOCK_FASYNC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	release_sock(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    }
    
    
    /* This function may be called only under socket lock or callback_lock or rcu_lock */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    int sock_wake_async(struct socket *sock, int how, int band)
    {
    
    	struct socket_wq *wq;
    
    	if (!sock)
    		return -1;
    	rcu_read_lock();
    	wq = rcu_dereference(sock->wq);
    	if (!wq || !wq->fasync_list) {
    		rcu_read_unlock();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return -1;
    
    	switch (how) {
    
    	case SOCK_WAKE_WAITD:
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
    			break;
    		goto call_kill;
    
    	case SOCK_WAKE_SPACE:
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
    			break;
    		/* fall through */
    
    		kill_fasync(&wq->fasync_list, SIGIO, band);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		break;
    
    		kill_fasync(&wq->fasync_list, SIGURG, band);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    int __sock_create(struct net *net, int family, int type, int protocol,
    
    			 struct socket **res, int kern)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	int err;
    	struct socket *sock;
    
    	const struct net_proto_family *pf;
    
    	 *      Check protocol is in range
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    	if (family < 0 || family >= NPROTO)
    		return -EAFNOSUPPORT;
    	if (type < 0 || type >= SOCK_MAX)
    		return -EINVAL;
    
    	/* Compatibility.
    
    	   This uglymoron is moved from INET layer to here to avoid
    	   deadlock in module load.
    	 */
    	if (family == PF_INET && type == SOCK_PACKET) {
    
    		static int warned;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!warned) {
    			warned = 1;
    
    			printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
    			       current->comm);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    		family = PF_PACKET;
    	}
    
    	err = security_socket_create(family, type, protocol, kern);
    	if (err)
    		return err;
    
    	/*
    	 *	Allocate the socket and allow the family to set things up. if
    	 *	the protocol is 0, the family is instructed to select an appropriate
    	 *	default.
    	 */
    	sock = sock_alloc();
    	if (!sock) {
    
    		net_warn_ratelimited("socket: no more sockets\n");
    
    		return -ENFILE;	/* Not exactly a match, but its the
    				   closest posix thing */
    	}
    
    	sock->type = type;
    
    
    	/* Attempt to load a protocol module if the find failed.
    	 *
    	 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 * requested real, full-featured networking support upon configuration.
    	 * Otherwise module support will break!
    	 */
    
    	if (rcu_access_pointer(net_families[family]) == NULL)
    
    		request_module("net-pf-%d", family);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #endif
    
    
    	rcu_read_lock();
    	pf = rcu_dereference(net_families[family]);
    	err = -EAFNOSUPPORT;
    	if (!pf)
    		goto out_release;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/*
    	 * We will call the ->create function, that possibly is in a loadable
    	 * module, so we have to bump that loadable module refcnt first.
    	 */
    
    	if (!try_module_get(pf->owner))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto out_release;
    
    
    	/* Now protected by module ref count */
    	rcu_read_unlock();
    
    
    	err = pf->create(net, sock, protocol, kern);
    
    	if (err < 0)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto out_module_put;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 * Now to bump the refcnt of the [loadable] module that owns this
    	 * socket at sock_release time we decrement its refcnt.
    	 */
    
    	if (!try_module_get(sock->ops->owner))
    		goto out_module_busy;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 * Now that we're done with the ->create function, the [loadable]
    	 * module can have its refcnt decremented
    	 */
    
    	module_put(pf->owner);
    
    	err = security_socket_post_create(sock, family, type, protocol, kern);
    	if (err)
    
    		goto out_sock_release;
    
    	*res = sock;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	return 0;
    
    out_module_busy:
    	err = -EAFNOSUPPORT;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out_module_put:
    
    	sock->ops = NULL;
    	module_put(pf->owner);
    out_sock_release:
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	sock_release(sock);
    
    	return err;
    
    out_release:
    	rcu_read_unlock();
    	goto out_sock_release;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(__sock_create);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    int sock_create(int family, int type, int protocol, struct socket **res)
    {
    
    	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    int sock_create_kern(int family, int type, int protocol, struct socket **res)
    {
    
    	return __sock_create(&init_net, family, type, protocol, res, 1);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	int retval;
    	struct socket *sock;
    
    	/* Check the SOCK_* constants for consistency.  */
    	BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
    	BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
    	BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
    	BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
    
    
    	flags = type & ~SOCK_TYPE_MASK;
    
    	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
    
    		return -EINVAL;
    	type &= SOCK_TYPE_MASK;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Ulrich Drepper's avatar
    Ulrich Drepper committed
    	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
    		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	retval = sock_create(family, type, protocol, &sock);
    	if (retval < 0)
    		goto out;
    
    
    	retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (retval < 0)
    		goto out_release;
    
    out:
    	/* It may be already another descriptor 8) Not kernel problem. */
    	return retval;
    
    out_release:
    	sock_release(sock);
    	return retval;
    }
    
    /*
     *	Create a pair of connected sockets.
     */
    
    
    SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
    		int __user *, usockvec)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct socket *sock1, *sock2;
    	int fd1, fd2, err;
    
    Al Viro's avatar
    Al Viro committed
    	struct file *newfile1, *newfile2;
    
    	int flags;
    
    	flags = type & ~SOCK_TYPE_MASK;
    
    	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
    
    		return -EINVAL;
    	type &= SOCK_TYPE_MASK;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Ulrich Drepper's avatar
    Ulrich Drepper committed
    	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
    		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 * Obtain the first socket and check if the underlying protocol
    	 * supports the socketpair call.
    	 */
    
    	err = sock_create(family, type, protocol, &sock1);
    	if (err < 0)
    		goto out;
    
    	err = sock_create(family, type, protocol, &sock2);
    	if (err < 0)
    		goto out_release_1;
    
    	err = sock1->ops->socketpair(sock1, sock2);
    
    	if (err < 0)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto out_release_both;
    
    
    	fd1 = get_unused_fd_flags(flags);
    
    	if (unlikely(fd1 < 0)) {
    		err = fd1;
    
    Al Viro's avatar
    Al Viro committed
    		goto out_release_both;
    
    	fd2 = get_unused_fd_flags(flags);
    
    	if (unlikely(fd2 < 0)) {
    		err = fd2;
    
    		goto out_put_unused_1;
    
    	newfile1 = sock_alloc_file(sock1, flags, NULL);
    
    	if (unlikely(IS_ERR(newfile1))) {
    		err = PTR_ERR(newfile1);
    
    		goto out_put_unused_both;
    
    	newfile2 = sock_alloc_file(sock2, flags, NULL);
    
    	if (IS_ERR(newfile2)) {
    		err = PTR_ERR(newfile2);
    
    	err = put_user(fd1, &usockvec[0]);
    	if (err)
    		goto out_fput_both;
    
    	err = put_user(fd2, &usockvec[1]);
    	if (err)
    		goto out_fput_both;
    
    
    Al Viro's avatar
    Al Viro committed
    	audit_fd_pair(fd1, fd2);
    
    Al Viro's avatar
    Al Viro committed
    	fd_install(fd1, newfile1);
    	fd_install(fd2, newfile2);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* fd1 and fd2 may be already another descriptors.
    	 * Not kernel problem.
    	 */
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    out_fput_both:
    	fput(newfile2);
    	fput(newfile1);
    	put_unused_fd(fd2);
    	put_unused_fd(fd1);
    	goto out;
    
    out_fput_1:
    	fput(newfile1);
    	put_unused_fd(fd2);
    	put_unused_fd(fd1);
    	sock_release(sock2);
    	goto out;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    out_put_unused_both:
    	put_unused_fd(fd2);
    out_put_unused_1:
    	put_unused_fd(fd1);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out_release_both:
    
    	sock_release(sock2);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out_release_1:
    
    	sock_release(sock1);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	return err;
    }
    
    /*
     *	Bind a name to a socket. Nothing much to do here since it's
     *	the protocol's responsibility to handle the local address.
     *
     *	We move the socket address to kernel space before we call
     *	the protocol layer (having also checked the address is ok).
     */
    
    
    SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct socket *sock;
    
    	int err, fput_needed;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    
    	if (sock) {
    
    		err = move_addr_to_kernel(umyaddr, addrlen, &address);
    
    		if (err >= 0) {
    			err = security_socket_bind(sock,
    
    			if (!err)
    				err = sock->ops->bind(sock,
    
    						      (struct sockaddr *)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    		fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return err;
    }
    
    /*
     *	Perform a listen. Basically, we allow the protocol to do anything
     *	necessary for a listen, and if that works, we mark the socket as
     *	ready for listening.
     */
    
    
    SYSCALL_DEFINE2(listen, int, fd, int, backlog)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct socket *sock;
    
    	int err, fput_needed;
    
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    	if (sock) {
    
    		somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
    
    		if ((unsigned int)backlog > somaxconn)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		err = security_socket_listen(sock, backlog);
    
    		if (!err)
    			err = sock->ops->listen(sock, backlog);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return err;
    }
    
    /*
     *	For accept, we attempt to create a new socket, set up the link
     *	with the client, wake up the client, then return the new
     *	connected fd. We collect the address of the connector in kernel
     *	space and move it to user at the very end. This is unclean because
     *	we open the socket then return an error.
     *
     *	1003.1g adds the ability to recvmsg() to query connection pending
     *	status to recvmsg. We need to add that support in a way thats
     *	clean when we restucture accept also.
     */
    
    
    SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
    		int __user *, upeer_addrlen, int, flags)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct socket *sock, *newsock;
    
    	int err, len, newfd, fput_needed;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
    
    Ulrich Drepper's avatar
    Ulrich Drepper committed
    		return -EINVAL;
    
    	if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
    		flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
    
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!sock)
    		goto out;
    
    	err = -ENFILE;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto out_put;
    
    	newsock->type = sock->type;
    	newsock->ops = sock->ops;
    
    	/*
    	 * We don't need try_module_get here, as the listening socket (sock)
    	 * has the protocol module (sock->ops->owner) held.
    	 */
    	__module_get(newsock->ops->owner);
    
    
    	newfd = get_unused_fd_flags(flags);
    
    	if (unlikely(newfd < 0)) {
    		err = newfd;
    
    		sock_release(newsock);
    		goto out_put;
    
    	newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name);
    
    	if (unlikely(IS_ERR(newfile))) {
    		err = PTR_ERR(newfile);
    		put_unused_fd(newfd);
    		sock_release(newsock);
    		goto out_put;
    	}
    
    	err = security_socket_accept(sock, newsock);
    	if (err)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	err = sock->ops->accept(sock, newsock, sock->file->f_flags);
    	if (err < 0)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (upeer_sockaddr) {
    
    		if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
    
    					  &len, 2) < 0) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			err = -ECONNABORTED;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    		err = move_addr_to_user(&address,
    
    					len, upeer_sockaddr, upeer_addrlen);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (err < 0)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	/* File flags are not inherited via accept() unlike another OSes. */
    
    
    	fd_install(newfd, newfile);
    	err = newfd;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    out_put:
    
    	fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	return err;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	goto out_put;
    }
    
    
    SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
    		int __user *, upeer_addrlen)
    
    Ulrich Drepper's avatar
    Ulrich Drepper committed
    	return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	Attempt to connect to a socket with the server address.  The address
     *	is in user space so we verify it is OK and move it to kernel space.
     *
     *	For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
     *	break bindings
     *
     *	NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
     *	other SEQPACKET protocols that take time to connect() as it doesn't
     *	include the -EINPROGRESS status for such sockets.
     */
    
    
    SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
    		int, addrlen)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct socket *sock;
    
    	int err, fput_needed;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!sock)
    		goto out;
    
    	err = move_addr_to_kernel(uservaddr, addrlen, &address);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (err < 0)
    		goto out_put;
    
    
    	    security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (err)
    		goto out_put;
    
    
    	err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				 sock->file->f_flags);
    out_put:
    
    	fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	return err;
    }
    
    /*
     *	Get the local address ('name') of a socket object. Move the obtained
     *	name to user space.
     */
    
    
    SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
    		int __user *, usockaddr_len)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct socket *sock;
    
    	int len, err, fput_needed;
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!sock)
    		goto out;
    
    	err = security_socket_getsockname(sock);
    	if (err)
    		goto out_put;
    
    
    	err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (err)
    		goto out_put;
    
    	err = move_addr_to_user(&address, len, usockaddr, usockaddr_len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    out_put:
    
    	fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	return err;
    }
    
    /*
     *	Get the remote address ('name') of a socket object. Move the obtained
     *	name to user space.
     */
    
    
    SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
    		int __user *, usockaddr_len)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct socket *sock;
    
    	int len, err, fput_needed;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    	if (sock != NULL) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = security_socket_getpeername(sock);
    		if (err) {
    
    			fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			return err;
    		}
    
    
    		    sock->ops->getname(sock, (struct sockaddr *)&address, &len,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!err)
    
    			err = move_addr_to_user(&address, len, usockaddr,
    
    						usockaddr_len);
    
    		fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return err;
    }
    
    /*
     *	Send a datagram to a given address. We move the address into kernel
     *	space and check the user space data area is readable before invoking
     *	the protocol.
     */
    
    
    SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
    
    		unsigned int, flags, struct sockaddr __user *, addr,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct socket *sock;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int err;
    	struct msghdr msg;
    	struct iovec iov;
    
    	if (len > INT_MAX)
    		len = INT_MAX;
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    	if (!sock)
    
    	iov.iov_base = buff;
    	iov.iov_len = len;
    	msg.msg_name = NULL;
    	msg.msg_iov = &iov;
    	msg.msg_iovlen = 1;
    	msg.msg_control = NULL;
    	msg.msg_controllen = 0;
    	msg.msg_namelen = 0;
    
    		err = move_addr_to_kernel(addr, addr_len, &address);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (err < 0)
    			goto out_put;
    
    		msg.msg_name = (struct sockaddr *)&address;
    
    		msg.msg_namelen = addr_len;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	if (sock->file->f_flags & O_NONBLOCK)
    		flags |= MSG_DONTWAIT;
    	msg.msg_flags = flags;
    	err = sock_sendmsg(sock, &msg, len);
    
    
    	fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return err;
    }
    
    /*
    
     *	Send a datagram down a socket.
    
    SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
    
    		unsigned int, flags)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	return sys_sendto(fd, buff, len, flags, NULL, 0);
    }
    
    /*
    
     *	Receive a frame from the socket and optionally record the address of the
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *	sender. We verify the buffers are writable and if needed move the
     *	sender address from kernel to user space.
     */
    
    
    SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
    
    		unsigned int, flags, struct sockaddr __user *, addr,
    
    		int __user *, addr_len)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct socket *sock;
    	struct iovec iov;
    	struct msghdr msg;
    
    	int err, err2;
    
    	if (size > INT_MAX)
    		size = INT_MAX;
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!sock)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	msg.msg_control = NULL;
    	msg.msg_controllen = 0;
    	msg.msg_iovlen = 1;
    	msg.msg_iov = &iov;
    	iov.iov_len = size;
    	iov.iov_base = ubuf;
    
    	/* Save some cycles and don't copy the address if not needed */
    	msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
    	/* We assume all kernel code knows the size of sockaddr_storage */
    	msg.msg_namelen = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (sock->file->f_flags & O_NONBLOCK)
    		flags |= MSG_DONTWAIT;
    
    	err = sock_recvmsg(sock, &msg, size, flags);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (err >= 0 && addr != NULL) {
    
    		err2 = move_addr_to_user(&address,
    
    		if (err2 < 0)
    			err = err2;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    
    	fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return err;
    }
    
    /*
    
     *	Receive a datagram from a socket.
    
    asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
    
    			 unsigned int flags)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
    }
    
    /*
     *	Set a socket option. Because we don't know the option lengths we have
     *	to pass the user mode parameter for the protocols to sort out.
     */
    
    
    SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
    		char __user *, optval, int, optlen)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	int err, fput_needed;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct socket *sock;
    
    	if (optlen < 0)
    		return -EINVAL;
    
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    	if (sock != NULL) {
    		err = security_socket_setsockopt(sock, level, optname);
    
    		if (err)
    			goto out_put;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		if (level == SOL_SOCKET)
    
    			err =
    			    sock_setsockopt(sock, level, optname, optval,
    					    optlen);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		else
    
    			err =
    			    sock->ops->setsockopt(sock, level, optname, optval,
    						  optlen);
    
    out_put:
    		fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return err;
    }
    
    /*
     *	Get a socket option. Because we don't know the option lengths we have
     *	to pass a user mode parameter for the protocols to sort out.
     */
    
    
    SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
    		char __user *, optval, int __user *, optlen)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	int err, fput_needed;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct socket *sock;
    
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    	if (sock != NULL) {
    
    		err = security_socket_getsockopt(sock, level, optname);
    		if (err)
    			goto out_put;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		if (level == SOL_SOCKET)
    
    			err =
    			    sock_getsockopt(sock, level, optname, optval,
    					    optlen);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		else
    
    			err =
    			    sock->ops->getsockopt(sock, level, optname, optval,
    						  optlen);
    
    out_put:
    		fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return err;
    }
    
    /*
     *	Shutdown a socket.
     */
    
    
    SYSCALL_DEFINE2(shutdown, int, fd, int, how)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	int err, fput_needed;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct socket *sock;
    
    
    	sock = sockfd_lookup_light(fd, &err, &fput_needed);
    	if (sock != NULL) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = security_socket_shutdown(sock, how);
    
    		if (!err)
    			err = sock->ops->shutdown(sock, how);
    		fput_light(sock->file, fput_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return err;
    }
    
    
    /* A couple of helpful macros for getting the address of the 32/64 bit
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * fields which are the same type (int / unsigned) on our platforms.
     */
    #define COMPAT_MSG(msg, member)	((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
    #define COMPAT_NAMELEN(msg)	COMPAT_MSG(msg, msg_namelen)
    #define COMPAT_FLAGS(msg)	COMPAT_MSG(msg, msg_flags)
    
    
    struct used_address {
    	struct sockaddr_storage name;
    	unsigned int name_len;
    };
    
    
    static int copy_msghdr_from_user(struct msghdr *kmsg,
    				 struct msghdr __user *umsg)
    {
    	if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
    		return -EFAULT;
    	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
    
    		kmsg->msg_namelen = sizeof(struct sockaddr_storage);
    
    static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
    
    			 struct msghdr *msg_sys, unsigned int flags,
    
    			 struct used_address *used_address)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct compat_msghdr __user *msg_compat =
    	    (struct compat_msghdr __user *)msg;