Newer
Older
/*
* NET An implementation of the SOCKET network access protocol.
*
* Version: @(#)socket.c 1.1.93 18/02/95
*
* Authors: Orest Zborowski, <obz@Kodak.COM>
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*
* Fixes:
* Anonymous : NOTSOCK/BADF cleanup. Error fix in
* shutdown()
* Alan Cox : verify_area() fixes
* Alan Cox : Removed DDI
* Jonathan Kamens : SOCK_DGRAM reconnect bug
* Alan Cox : Moved a load of checks to the very
* top level.
* Alan Cox : Move address structures to/from user
* mode above the protocol layers.
* Rob Janssen : Allow 0 length sends.
* Alan Cox : Asynchronous I/O support (cribbed from the
* tty drivers).
* Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
* Jeff Uphoff : Made max number of sockets command-line
* configurable.
* Matti Aarnio : Made the number of sockets dynamic,
* to be allocated when needed, and mr.
* Uphoff's max is used as max to be
* allowed to allocate.
* Linus : Argh. removed all the socket allocation
* altogether: it's in the inode now.
* Alan Cox : Made sock_alloc()/sock_release() public
* for NetROM and future kernel nfsd type
* stuff.
* Alan Cox : sendmsg/recvmsg basics.
* Tom Dyas : Export net symbols.
* Marcin Dalecki : Fixed problems with CONFIG_NET="n".
* Alan Cox : Added thread locking to sys_* calls
* for sockets. May have errors at the
* moment.
* Kevin Buhr : Fixed the dumb errors in the above.
* Andi Kleen : Some small cleanups, optimizations,
* and fixed a copy_from_user() bug.
* Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
* Tigran Aivazian : Made listen(2) backlog sanity checks
* protocol-independent
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*
* This module is effectively the top level interface to the BSD socket
*
* Based upon Swansea University Computer Society NET3.039
*/
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/file.h>
#include <linux/net.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/if_frad.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/cache.h>
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/kmod.h>
#include <linux/audit.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>

Masatake YAMATO
committed
#include <linux/xattr.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <net/compat.h>
#include <net/wext.h>
#include <net/sock.h>
#include <linux/netfilter.h>
#include <linux/if_tun.h>
#include <linux/ipv6_route.h>
#include <linux/route.h>
#include <linux/sockios.h>
#include <linux/atalk.h>
#include <net/busy_poll.h>
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
unsigned int sysctl_net_busy_poll __read_mostly;
static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file);
static unsigned int sock_poll(struct file *file,
struct poll_table_struct *wait);
static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
static long compat_sock_ioctl(struct file *file,
unsigned int cmd, unsigned long arg);
static int sock_fasync(int fd, struct file *filp, int on);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
/*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
* in the operation structures but are done directly via the socketcall() multiplexor.
*/
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.aio_read = sock_aio_read,
.aio_write = sock_aio_write,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl,
#endif
.mmap = sock_mmap,
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
};
/*
* The protocol list. Each protocol is registered in here.
*/
static DEFINE_SPINLOCK(net_family_lock);
static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
/*
* Statistics counters of the socket lists
*/
static DEFINE_PER_CPU(int, sockets_in_use);
* Support routines.
* Move socket addresses back and forth across the kernel/user
* divide and look after the messy bits.
*/
/**
* move_addr_to_kernel - copy a socket address into kernel space
* @uaddr: Address in user space
* @kaddr: Address in kernel space
* @ulen: Length in user space
*
* The address is copied into kernel space. If the provided address is
* too long an error code of -EINVAL is returned. If the copy gives
* invalid addresses -EFAULT is returned. On a success 0 is returned.
*/
int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr)
if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
if (copy_from_user(kaddr, uaddr, ulen))
return audit_sockaddr(ulen, kaddr);
}
/**
* move_addr_to_user - copy an address to user space
* @kaddr: kernel space address
* @klen: length of address in kernel
* @uaddr: user space address
* @ulen: pointer to user length field
*
* The value pointed to by ulen on entry is the buffer length available.
* This is overwritten with the buffer space used. -EINVAL is returned
* if an overlong buffer is specified or a negative buffer size. -EFAULT
* is returned if either the buffer or the length field are not
* accessible.
* After copying the data up to the limit the user specifies, the true
* length of the data is written over the length limit the user
* specified. Zero is returned for a success.
*/
static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,

Hannes Frederic Sowa
committed
BUG_ON(klen > sizeof(struct sockaddr_storage));
err = get_user(len, ulen);
if (err)

Hannes Frederic Sowa
committed
if (len < 0)
if (audit_sockaddr(klen, kaddr))
return -ENOMEM;
if (copy_to_user(uaddr, kaddr, len))
* "fromlen shall refer to the value before truncation.."
* 1003.1g
static struct kmem_cache *sock_inode_cachep __read_mostly;
static struct inode *sock_alloc_inode(struct super_block *sb)
{
struct socket_alloc *ei;
ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
wq = kmalloc(sizeof(*wq), GFP_KERNEL);
if (!wq) {
kmem_cache_free(sock_inode_cachep, ei);
return NULL;
}
init_waitqueue_head(&wq->wait);
wq->fasync_list = NULL;
RCU_INIT_POINTER(ei->socket.wq, wq);
ei->socket.state = SS_UNCONNECTED;
ei->socket.flags = 0;
ei->socket.ops = NULL;
ei->socket.sk = NULL;
ei->socket.file = NULL;
return &ei->vfs_inode;
}
static void sock_destroy_inode(struct inode *inode)
{
struct socket_alloc *ei;
ei = container_of(inode, struct socket_alloc, vfs_inode);
wq = rcu_dereference_protected(ei->socket.wq, 1);
kfree_rcu(wq, rcu);
kmem_cache_free(sock_inode_cachep, ei);
static void init_once(void *foo)
struct socket_alloc *ei = (struct socket_alloc *)foo;
static int init_inodecache(void)
{
sock_inode_cachep = kmem_cache_create("sock_inode_cache",
sizeof(struct socket_alloc),
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD),
if (sock_inode_cachep == NULL)
return -ENOMEM;
return 0;
}
static const struct super_operations sockfs_ops = {
.alloc_inode = sock_alloc_inode,
.destroy_inode = sock_destroy_inode,
.statfs = simple_statfs,
/*
* sockfs_dname() is called from d_path().
*/
static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
{
return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
dentry->d_inode->i_ino);
}
static const struct dentry_operations sockfs_dentry_operations = {
.d_dname = sockfs_dname,
static struct dentry *sockfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return mount_pseudo(fs_type, "socket:", &sockfs_ops,
&sockfs_dentry_operations, SOCKFS_MAGIC);
}
static struct vfsmount *sock_mnt __read_mostly;
static struct file_system_type sock_fs_type = {
.name = "sockfs",
.mount = sockfs_mount,
.kill_sb = kill_anon_super,
};
/*
* Obtains the first available file descriptor and sets it up for use.
*
* These functions create file structures and maps them to fd space
* of the current process. On success it returns file descriptor
* and file struct implicitly stored in sock->file.
* Note that another thread may close file descriptor before we return
* from this function. We use the fact that now we do not refer
* to socket after mapping. If one day we will need it, this
* function will increment ref. count on file by 1.
*
* In any case returned fd MAY BE not valid!
* This race condition is unavoidable
* with shared fd spaces, we cannot solve it inside kernel,
* but we take care of internal coherence yet.
*/

Linus Torvalds
committed
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
struct qstr name = { .name = "" };

Masatake YAMATO
committed
if (dname) {
name.name = dname;
name.len = strlen(name.name);
} else if (sock->sk) {
name.name = sock->sk->sk_prot_creator->name;
name.len = strlen(name.name);
}
path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name);
if (unlikely(!path.dentry))
return ERR_PTR(-ENOMEM);
d_instantiate(path.dentry, SOCK_INODE(sock));
SOCK_INODE(sock)->i_fop = &socket_file_ops;
file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
&socket_file_ops);
if (unlikely(IS_ERR(file))) {
file->f_flags = O_RDWR | (flags & O_NONBLOCK);
file->private_data = sock;
EXPORT_SYMBOL(sock_alloc_file);
static int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd = get_unused_fd_flags(flags);
if (unlikely(fd < 0))
return fd;

Linus Torvalds
committed
newfile = sock_alloc_file(sock, flags, NULL);
if (likely(!IS_ERR(newfile))) {
fd_install(fd, newfile);
put_unused_fd(fd);
return PTR_ERR(newfile);
struct socket *sock_from_file(struct file *file, int *err)
{
if (file->f_op == &socket_file_ops)
return file->private_data; /* set in sock_map_fd */
EXPORT_SYMBOL(sock_from_file);
* sockfd_lookup - Go from a file number to its socket slot
* @fd: file handle
* @err: pointer to an error code return
*
* The file handle passed in is locked and the socket it is bound
* too is returned. If an error occurs the err pointer is overwritten
* with a negative errno code and NULL is returned. The function checks
* for both invalid handles and passing a handle which is not a socket.
*
* On a success the socket object pointer is returned.
*/
struct socket *sockfd_lookup(int fd, int *err)
{
struct file *file;
struct socket *sock;
sock = sock_from_file(file, err);
if (!sock)
EXPORT_SYMBOL(sockfd_lookup);
static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
{
struct fd f = fdget(fd);
*err = -EBADF;
if (f.file) {
sock = sock_from_file(f.file, err);
if (likely(sock)) {
*fput_needed = f.flags;
}
fdput(f);

Masatake YAMATO
committed
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname"
#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX)
#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1)
static ssize_t sockfs_getxattr(struct dentry *dentry,
const char *name, void *value, size_t size)
{
const char *proto_name;
size_t proto_size;
int error;
error = -ENODATA;
if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) {
proto_name = dentry->d_name.name;
proto_size = strlen(proto_name);
if (value) {
error = -ERANGE;
if (proto_size + 1 > size)
goto out;
strncpy(value, proto_name, proto_size + 1);
}
error = proto_size + 1;
}
out:
return error;
}
static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
size_t size)
{
ssize_t len;
ssize_t used = 0;
len = security_inode_listsecurity(dentry->d_inode, buffer, size);
if (len < 0)
return len;
used += len;
if (buffer) {
if (size < used)
return -ERANGE;
buffer += len;
}
len = (XATTR_NAME_SOCKPROTONAME_LEN + 1);
used += len;
if (buffer) {
if (size < used)
return -ERANGE;
memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len);
buffer += len;
}
return used;
}
static const struct inode_operations sockfs_inode_ops = {
.getxattr = sockfs_getxattr,
.listxattr = sockfs_listxattr,
};
* Allocate a new inode and socket object. The two are bound together
* and initialised. The socket is then returned. If we are out of inodes
* NULL is returned.
*/
static struct socket *sock_alloc(void)
{
struct inode *inode;
struct socket *sock;
inode = new_inode_pseudo(sock_mnt->mnt_sb);
if (!inode)
return NULL;
sock = SOCKET_I(inode);
kmemcheck_annotate_bitfield(sock, type);
inode->i_ino = get_next_ino();
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();

Masatake YAMATO
committed
inode->i_op = &sockfs_inode_ops;
this_cpu_add(sockets_in_use, 1);
return sock;
}
/*
* In theory you can't get an open on this inode, but /proc provides
* a back door. Remember to keep it shut otherwise you'll let the
* creepy crawlies in.
*/
static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
{
return -ENXIO;
}
const struct file_operations bad_sock_fops = {
};
/**
* sock_release - close a socket
* @sock: socket to close
*
* The socket is released from the protocol stack if it has a release
* callback, and the inode is then released if the socket is bound to
void sock_release(struct socket *sock)
{
if (sock->ops) {
struct module *owner = sock->ops->owner;
sock->ops->release(sock);
sock->ops = NULL;
module_put(owner);
}
if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
printk(KERN_ERR "sock_release: fasync list not empty!\n");
if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags))
return;
this_cpu_sub(sockets_in_use, 1);
if (!sock->file) {
iput(SOCK_INODE(sock));
return;
}
EXPORT_SYMBOL(sock_release);
void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
if (sock_flag(sk, SOCK_WIFI_STATUS))
*tx_flags |= SKBTX_WIFI_STATUS;
}
EXPORT_SYMBOL(sock_tx_timestamp);
static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size)
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
si->sock = sock;
si->scm = NULL;
si->msg = msg;
si->size = size;
return sock->ops->sendmsg(iocb, sock, msg, size);
}
static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size)
{
int err = security_socket_sendmsg(sock, msg, size);
return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size);
}
int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = __sock_sendmsg(&iocb, sock, msg, size);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
EXPORT_SYMBOL(sock_sendmsg);
static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = __sock_sendmsg_nosec(&iocb, sock, msg, size);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
mm_segment_t oldfs = get_fs();
int result;
set_fs(KERNEL_DS);
/*
* the following is safe, since for compiler definitions of kvec and
* iovec are identical, yielding the same in-core layout and alignment
*/
msg->msg_iov = (struct iovec *)vec;
msg->msg_iovlen = num;
result = sock_sendmsg(sock, msg, size);
set_fs(oldfs);
return result;
}
EXPORT_SYMBOL(kernel_sendmsg);
/*
* called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
*/
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
struct timespec ts[3];
int empty = 1;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
if (need_software_tstamp && skb->tstamp.tv64 == 0)
__net_timestamp(skb);
if (need_software_tstamp) {
if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
struct timeval tv;
skb_get_timestamp(skb, &tv);
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
sizeof(tv), &tv);
} else {
skb_get_timestampns(skb, &ts[0]);
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
sizeof(ts[0]), &ts[0]);
}
}
memset(ts, 0, sizeof(ts));
if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) &&
ktime_to_timespec_cond(skb->tstamp, ts + 0))
empty = 0;
if (shhwtstamps) {
if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1))
empty = 0;
if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2))
if (!empty)
put_cmsg(msg, SOL_SOCKET,
SCM_TIMESTAMPING, sizeof(ts), &ts);
EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
int ack;
if (!sock_flag(sk, SOCK_WIFI_STATUS))
return;
if (!skb->wifi_acked_valid)
return;
ack = skb->wifi_acked;
put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
}
EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
sizeof(__u32), &skb->dropcount);
}
void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
sock_recv_timestamp(msg, sk, skb);
sock_recv_drops(msg, sk, skb);
}
EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
si->sock = sock;
si->scm = NULL;
si->msg = msg;
si->size = size;
si->flags = flags;
return sock->ops->recvmsg(iocb, sock, msg, size, flags);
}
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
int err = security_socket_recvmsg(sock, msg, size, flags);
return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
}
int sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
iocb.private = &siocb;
ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
EXPORT_SYMBOL(sock_recvmsg);
static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
/**
* kernel_recvmsg - Receive a message from a socket (kernel space)
* @sock: The socket to receive the message from
* @msg: Received message
* @vec: Input s/g array for message data
* @num: Size of input s/g array
* @size: Number of bytes to read
* @flags: Message flags (MSG_DONTWAIT, etc...)
*
* On return the msg structure contains the scatter/gather array passed in the
* vec argument. The array is modified so that it consists of the unfilled
* portion of the original array.
*
* The returned value is the total number of bytes received, or an error.
*/
int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size, int flags)
{
mm_segment_t oldfs = get_fs();
int result;
set_fs(KERNEL_DS);
/*
* the following is safe, since for compiler definitions of kvec and
* iovec are identical, yielding the same in-core layout and alignment
*/
msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
result = sock_recvmsg(sock, msg, size, flags);
set_fs(oldfs);
return result;
}
EXPORT_SYMBOL(kernel_recvmsg);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more)
sock = file->private_data;
flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
flags |= more;
return kernel_sendpage(sock, page, offset, size, flags);
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct socket *sock = file->private_data;
if (unlikely(!sock->ops->splice_read))
return -EINVAL;
return sock->ops->splice_read(sock, ppos, pipe, len, flags);
}
static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
siocb->kiocb = iocb;
iocb->private = siocb;
return siocb;
static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
struct file *file, const struct iovec *iov,
unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
int i;
for (i = 0; i < nr_segs; i++)
size += iov[i].iov_len;
msg->msg_name = NULL;
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;
if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
struct file *file, const struct iovec *iov,
unsigned long nr_segs)
struct socket *sock = file->private_data;
size_t size = 0;
int i;
for (i = 0; i < nr_segs; i++)
size += iov[i].iov_len;
msg->msg_name = NULL;
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
if (sock->type == SOCK_SEQPACKET)
msg->msg_flags |= MSG_EOR;
return __sock_sendmsg(iocb, sock, msg, size);
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;
if (pos != 0)
return -ESPIPE;
x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}
/*
* Atomic setting of ioctl hooks to avoid race
* with module unload.
*/
static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
void vlan_ioctl_set(int (*hook) (struct net *, void __user *))