Skip to content
Snippets Groups Projects
socket.c 195 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    out:
    	sctp_release_sock(sk);
    	return err;
    }
    
    /*
     * This function is done by modeling the current datagram_poll() and the
     * tcp_poll().  Note that, based on these implementations, we don't
     * lock the socket in this function, even though it seems that,
     * ideally, locking or some other mechanisms can be used to ensure
    
     * the integrity of the counters (sndbuf and wmem_alloc) used
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * in this place.  We assume that we don't need locks either until proven
     * otherwise.
     *
     * Another thing to note is that we include the Async I/O support
     * here, again, by modeling the current TCP/UDP code.  We don't have
     * a good way to test with it yet.
     */
    unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
    {
    	struct sock *sk = sock->sk;
    	struct sctp_sock *sp = sctp_sk(sk);
    	unsigned int mask;
    
    	poll_wait(file, sk->sk_sleep, wait);
    
    	/* A TCP-style listening socket becomes readable when the accept queue
    	 * is not empty.
    	 */
    	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
    		return (!list_empty(&sp->ep->asocs)) ?
    
    			(POLLIN | POLLRDNORM) : 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	mask = 0;
    
    	/* Is there any exceptional events?  */
    	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
    		mask |= POLLERR;
    
    	if (sk->sk_shutdown & RCV_SHUTDOWN)
    		mask |= POLLRDHUP;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (sk->sk_shutdown == SHUTDOWN_MASK)
    		mask |= POLLHUP;
    
    	/* Is it readable?  Reconsider this code with TCP-style support.  */
    	if (!skb_queue_empty(&sk->sk_receive_queue) ||
    	    (sk->sk_shutdown & RCV_SHUTDOWN))
    		mask |= POLLIN | POLLRDNORM;
    
    	/* The association is either gone or not ready.  */
    	if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED))
    		return mask;
    
    	/* Is it writable?  */
    	if (sctp_writeable(sk)) {
    		mask |= POLLOUT | POLLWRNORM;
    	} else {
    		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
    		/*
    		 * Since the socket is not locked, the buffer
    		 * might be made available after the writeable check and
    		 * before the bit is set.  This could cause a lost I/O
    		 * signal.  tcp_poll() has a race breaker for this race
    		 * condition.  Based on their implementation, we put
    		 * in the following code to cover it as well.
    		 */
    		if (sctp_writeable(sk))
    			mask |= POLLOUT | POLLWRNORM;
    	}
    	return mask;
    }
    
    /********************************************************************
     * 2nd Level Abstractions
     ********************************************************************/
    
    static struct sctp_bind_bucket *sctp_bucket_create(
    	struct sctp_bind_hashbucket *head, unsigned short snum)
    {
    	struct sctp_bind_bucket *pp;
    
    
    	pp = kmem_cache_alloc(sctp_bucket_cachep, GFP_ATOMIC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (pp) {
    
    		SCTP_DBG_OBJCNT_INC(bind_bucket);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		pp->port = snum;
    		pp->fastreuse = 0;
    		INIT_HLIST_HEAD(&pp->owner);
    
    		hlist_add_head(&pp->node, &head->chain);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return pp;
    }
    
    /* Caller must hold hashbucket lock for this tb with local BH disabled */
    static void sctp_bucket_destroy(struct sctp_bind_bucket *pp)
    {
    
    	if (pp && hlist_empty(&pp->owner)) {
    
    		__hlist_del(&pp->node);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		kmem_cache_free(sctp_bucket_cachep, pp);
    		SCTP_DBG_OBJCNT_DEC(bind_bucket);
    	}
    }
    
    /* Release this socket's reference to a local port.  */
    static inline void __sctp_put_port(struct sock *sk)
    {
    	struct sctp_bind_hashbucket *head =
    		&sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->num)];
    	struct sctp_bind_bucket *pp;
    
    	sctp_spin_lock(&head->lock);
    	pp = sctp_sk(sk)->bind_hash;
    	__sk_del_bind_node(sk);
    	sctp_sk(sk)->bind_hash = NULL;
    	inet_sk(sk)->num = 0;
    	sctp_bucket_destroy(pp);
    	sctp_spin_unlock(&head->lock);
    }
    
    void sctp_put_port(struct sock *sk)
    {
    	sctp_local_bh_disable();
    	__sctp_put_port(sk);
    	sctp_local_bh_enable();
    }
    
    /*
     * The system picks an ephemeral port and choose an address set equivalent
     * to binding with a wildcard address.
     * One of those addresses will be the primary address for the association.
     * This automatically enables the multihoming capability of SCTP.
     */
    static int sctp_autobind(struct sock *sk)
    {
    	union sctp_addr autoaddr;
    	struct sctp_af *af;
    
    	__be16 port;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Initialize a local sockaddr structure to INADDR_ANY. */
    	af = sctp_sk(sk)->pf->af;
    
    	port = htons(inet_sk(sk)->num);
    	af->inaddr_any(&autoaddr, port);
    
    	return sctp_do_bind(sk, &autoaddr, af->sockaddr_len);
    }
    
    /* Parse out IPPROTO_SCTP CMSG headers.  Perform only minimal validation.
     *
     * From RFC 2292
     * 4.2 The cmsghdr Structure *
     *
     * When ancillary data is sent or received, any number of ancillary data
     * objects can be specified by the msg_control and msg_controllen members of
     * the msghdr structure, because each object is preceded by
     * a cmsghdr structure defining the object's length (the cmsg_len member).
     * Historically Berkeley-derived implementations have passed only one object
     * at a time, but this API allows multiple objects to be
     * passed in a single call to sendmsg() or recvmsg(). The following example
     * shows two ancillary data objects in a control buffer.
     *
     *   |<--------------------------- msg_controllen -------------------------->|
     *   |                                                                       |
     *
     *   |<----- ancillary data object ----->|<----- ancillary data object ----->|
     *
     *   |<---------- CMSG_SPACE() --------->|<---------- CMSG_SPACE() --------->|
     *   |                                   |                                   |
     *
     *   |<---------- cmsg_len ---------->|  |<--------- cmsg_len ----------->|  |
     *
     *   |<--------- CMSG_LEN() --------->|  |<-------- CMSG_LEN() ---------->|  |
     *   |                                |  |                                |  |
     *
     *   +-----+-----+-----+--+-----------+--+-----+-----+-----+--+-----------+--+
     *   |cmsg_|cmsg_|cmsg_|XX|           |XX|cmsg_|cmsg_|cmsg_|XX|           |XX|
     *
     *   |len  |level|type |XX|cmsg_data[]|XX|len  |level|type |XX|cmsg_data[]|XX|
     *
     *   +-----+-----+-----+--+-----------+--+-----+-----+-----+--+-----------+--+
     *    ^
     *    |
     *
     * msg_control
     * points here
     */
    SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
    				  sctp_cmsgs_t *cmsgs)
    {
    	struct cmsghdr *cmsg;
    
    	struct msghdr *my_msg = (struct msghdr *)msg;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	for (cmsg = CMSG_FIRSTHDR(msg);
    	     cmsg != NULL;
    
    	     cmsg = CMSG_NXTHDR(my_msg, cmsg)) {
    		if (!CMSG_OK(my_msg, cmsg))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			return -EINVAL;
    
    		/* Should we parse this header or ignore?  */
    		if (cmsg->cmsg_level != IPPROTO_SCTP)
    			continue;
    
    		/* Strictly check lengths following example in SCM code.  */
    		switch (cmsg->cmsg_type) {
    		case SCTP_INIT:
    			/* SCTP Socket API Extension
    			 * 5.2.1 SCTP Initiation Structure (SCTP_INIT)
    			 *
    			 * This cmsghdr structure provides information for
    			 * initializing new SCTP associations with sendmsg().
    			 * The SCTP_INITMSG socket option uses this same data
    			 * structure.  This structure is not used for
    			 * recvmsg().
    			 *
    			 * cmsg_level    cmsg_type      cmsg_data[]
    			 * ------------  ------------   ----------------------
    			 * IPPROTO_SCTP  SCTP_INIT      struct sctp_initmsg
    			 */
    			if (cmsg->cmsg_len !=
    			    CMSG_LEN(sizeof(struct sctp_initmsg)))
    				return -EINVAL;
    			cmsgs->init = (struct sctp_initmsg *)CMSG_DATA(cmsg);
    			break;
    
    		case SCTP_SNDRCV:
    			/* SCTP Socket API Extension
    			 * 5.2.2 SCTP Header Information Structure(SCTP_SNDRCV)
    			 *
    			 * This cmsghdr structure specifies SCTP options for
    			 * sendmsg() and describes SCTP header information
    			 * about a received message through recvmsg().
    			 *
    			 * cmsg_level    cmsg_type      cmsg_data[]
    			 * ------------  ------------   ----------------------
    			 * IPPROTO_SCTP  SCTP_SNDRCV    struct sctp_sndrcvinfo
    			 */
    			if (cmsg->cmsg_len !=
    			    CMSG_LEN(sizeof(struct sctp_sndrcvinfo)))
    				return -EINVAL;
    
    			cmsgs->info =
    				(struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
    
    			/* Minimally, validate the sinfo_flags. */
    			if (cmsgs->info->sinfo_flags &
    
    			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
    			      SCTP_ABORT | SCTP_EOF))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				return -EINVAL;
    			break;
    
    		default:
    			return -EINVAL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return 0;
    }
    
    /*
     * Wait for a packet..
     * Note: This function is the same function as in core/datagram.c
     * with a few modifications to make lksctp work.
     */
    static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
    {
    	int error;
    	DEFINE_WAIT(wait);
    
    	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
    
    	/* Socket errors? */
    	error = sock_error(sk);
    	if (error)
    		goto out;
    
    	if (!skb_queue_empty(&sk->sk_receive_queue))
    		goto ready;
    
    	/* Socket shut down?  */
    	if (sk->sk_shutdown & RCV_SHUTDOWN)
    		goto out;
    
    	/* Sequenced packets can come disconnected.  If so we report the
    	 * problem.
    	 */
    	error = -ENOTCONN;
    
    	/* Is there a good reason to think that we may receive some data?  */
    	if (list_empty(&sctp_sk(sk)->ep->asocs) && !sctp_sstate(sk, LISTENING))
    		goto out;
    
    	/* Handle signals.  */
    	if (signal_pending(current))
    		goto interrupted;
    
    	/* Let another process have a go.  Since we are going to sleep
    	 * anyway.  Note: This may cause odd behaviors if the message
    	 * does not fit in the user's buffer, but this seems to be the
    	 * only way to honor MSG_DONTWAIT realistically.
    	 */
    	sctp_release_sock(sk);
    	*timeo_p = schedule_timeout(*timeo_p);
    	sctp_lock_sock(sk);
    
    ready:
    	finish_wait(sk->sk_sleep, &wait);
    	return 0;
    
    interrupted:
    	error = sock_intr_errno(*timeo_p);
    
    out:
    	finish_wait(sk->sk_sleep, &wait);
    	*err = error;
    	return error;
    }
    
    /* Receive a datagram.
     * Note: This is pretty much the same routine as in core/datagram.c
     * with a few changes to make lksctp work.
     */
    static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
    					      int noblock, int *err)
    {
    	int error;
    	struct sk_buff *skb;
    	long timeo;
    
    	timeo = sock_rcvtimeo(sk, noblock);
    
    	SCTP_DEBUG_PRINTK("Timeout: timeo: %ld, MAX: %ld.\n",
    			  timeo, MAX_SCHEDULE_TIMEOUT);
    
    	do {
    		/* Again only user level code calls this function,
    		 * so nothing interrupt level
    		 * will suddenly eat the receive_queue.
    		 *
    		 *  Look at current nfs client by the way...
    		 *  However, this function was corrent in any case. 8)
    		 */
    		if (flags & MSG_PEEK) {
    
    			spin_lock_bh(&sk->sk_receive_queue.lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			skb = skb_peek(&sk->sk_receive_queue);
    			if (skb)
    				atomic_inc(&skb->users);
    
    			spin_unlock_bh(&sk->sk_receive_queue.lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		} else {
    			skb = skb_dequeue(&sk->sk_receive_queue);
    		}
    
    		if (skb)
    			return skb;
    
    
    		/* Caller is allowed not to check sk->sk_err before calling. */
    		error = sock_error(sk);
    		if (error)
    			goto no_packet;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (sk->sk_shutdown & RCV_SHUTDOWN)
    			break;
    
    		/* User doesn't want to wait.  */
    		error = -EAGAIN;
    		if (!timeo)
    			goto no_packet;
    	} while (sctp_wait_for_packet(sk, err, &timeo) == 0);
    
    	return NULL;
    
    no_packet:
    	*err = error;
    	return NULL;
    }
    
    /* If sndbuf has changed, wake up per association sndbuf waiters.  */
    static void __sctp_write_space(struct sctp_association *asoc)
    {
    	struct sock *sk = asoc->base.sk;
    	struct socket *sock = sk->sk_socket;
    
    	if ((sctp_wspace(asoc) > 0) && sock) {
    		if (waitqueue_active(&asoc->wait))
    			wake_up_interruptible(&asoc->wait);
    
    		if (sctp_writeable(sk)) {
    			if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
    				wake_up_interruptible(sk->sk_sleep);
    
    			/* Note that we try to include the Async I/O support
    			 * here by modeling from the current TCP/UDP code.
    			 * We have not tested with it yet.
    			 */
    			if (sock->fasync_list &&
    			    !(sk->sk_shutdown & SEND_SHUTDOWN))
    
    				sock_wake_async(sock,
    						SOCK_WAKE_SPACE, POLL_OUT);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	}
    }
    
    /* Do accounting for the sndbuf space.
     * Decrement the used sndbuf space of the corresponding association by the
     * data size which was just transmitted(freed).
     */
    static void sctp_wfree(struct sk_buff *skb)
    {
    	struct sctp_association *asoc;
    	struct sctp_chunk *chunk;
    	struct sock *sk;
    
    	/* Get the saved chunk pointer.  */
    	chunk = *((struct sctp_chunk **)(skb->cb));
    	asoc = chunk->asoc;
    	sk = asoc->base.sk;
    
    	asoc->sndbuf_used -= SCTP_DATA_SNDSIZE(chunk) +
    				sizeof(struct sk_buff) +
    				sizeof(struct sctp_chunk);
    
    	atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
    
    
    	 * This undoes what is done via sctp_set_owner_w and sk_mem_charge
    
    	 */
    	sk->sk_wmem_queued   -= skb->truesize;
    
    	sk_mem_uncharge(sk, skb->truesize);
    
    	sock_wfree(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	__sctp_write_space(asoc);
    
    	sctp_association_put(asoc);
    }
    
    
    /* Do accounting for the receive space on the socket.
     * Accounting for the association is done in ulpevent.c
     * We set this as a destructor for the cloned data skbs so that
     * accounting is done at the correct time.
     */
    void sctp_sock_rfree(struct sk_buff *skb)
    {
    	struct sock *sk = skb->sk;
    	struct sctp_ulpevent *event = sctp_skb2event(skb);
    
    	atomic_sub(event->rmem_len, &sk->sk_rmem_alloc);
    
    	 * Mimic the behavior of sock_rfree
    
    	sk_mem_uncharge(sk, event->rmem_len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* Helper function to wait for space in the sndbuf.  */
    static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
    				size_t msg_len)
    {
    	struct sock *sk = asoc->base.sk;
    	int err = 0;
    	long current_timeo = *timeo_p;
    	DEFINE_WAIT(wait);
    
    	SCTP_DEBUG_PRINTK("wait_for_sndbuf: asoc=%p, timeo=%ld, msg_len=%zu\n",
    
    			  asoc, (long)(*timeo_p), msg_len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Increment the association's refcnt.  */
    	sctp_association_hold(asoc);
    
    	/* Wait on the association specific sndbuf space. */
    	for (;;) {
    		prepare_to_wait_exclusive(&asoc->wait, &wait,
    					  TASK_INTERRUPTIBLE);
    		if (!*timeo_p)
    			goto do_nonblock;
    		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
    		    asoc->base.dead)
    			goto do_error;
    		if (signal_pending(current))
    			goto do_interrupted;
    		if (msg_len <= sctp_wspace(asoc))
    			break;
    
    		/* Let another process have a go.  Since we are going
    		 * to sleep anyway.
    		 */
    		sctp_release_sock(sk);
    		current_timeo = schedule_timeout(current_timeo);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		sctp_lock_sock(sk);
    
    		*timeo_p = current_timeo;
    	}
    
    out:
    	finish_wait(&asoc->wait, &wait);
    
    	/* Release the association's refcnt.  */
    	sctp_association_put(asoc);
    
    	return err;
    
    do_error:
    	err = -EPIPE;
    	goto out;
    
    do_interrupted:
    	err = sock_intr_errno(*timeo_p);
    	goto out;
    
    do_nonblock:
    	err = -EAGAIN;
    	goto out;
    }
    
    /* If socket sndbuf has changed, wake up all per association waiters.  */
    void sctp_write_space(struct sock *sk)
    {
    	struct sctp_association *asoc;
    
    	/* Wake up the tasks in each wait queue.  */
    
    	list_for_each_entry(asoc, &((sctp_sk(sk))->ep->asocs), asocs) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		__sctp_write_space(asoc);
    	}
    }
    
    /* Is there any sndbuf space available on the socket?
     *
    
     * Note that sk_wmem_alloc is the sum of the send buffers on all of the
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * associations on the same socket.  For a UDP-style socket with
     * multiple associations, it is possible for it to be "unwriteable"
     * prematurely.  I assume that this is acceptable because
     * a premature "unwriteable" is better than an accidental "writeable" which
     * would cause an unwanted block under certain circumstances.  For the 1-1
     * UDP-style sockets or TCP-style sockets, this code should work.
     *  - Daisy
     */
    static int sctp_writeable(struct sock *sk)
    {
    	int amt = 0;
    
    
    	amt = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (amt < 0)
    		amt = 0;
    	return amt;
    }
    
    /* Wait for an association to go into ESTABLISHED state. If timeout is 0,
     * returns immediately with EINPROGRESS.
     */
    static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p)
    {
    	struct sock *sk = asoc->base.sk;
    	int err = 0;
    	long current_timeo = *timeo_p;
    	DEFINE_WAIT(wait);
    
    
    	SCTP_DEBUG_PRINTK("%s: asoc=%p, timeo=%ld\n", __func__, asoc,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			  (long)(*timeo_p));
    
    	/* Increment the association's refcnt.  */
    	sctp_association_hold(asoc);
    
    	for (;;) {
    		prepare_to_wait_exclusive(&asoc->wait, &wait,
    					  TASK_INTERRUPTIBLE);
    		if (!*timeo_p)
    			goto do_nonblock;
    		if (sk->sk_shutdown & RCV_SHUTDOWN)
    			break;
    		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
    		    asoc->base.dead)
    			goto do_error;
    		if (signal_pending(current))
    			goto do_interrupted;
    
    		if (sctp_state(asoc, ESTABLISHED))
    			break;
    
    		/* Let another process have a go.  Since we are going
    		 * to sleep anyway.
    		 */
    		sctp_release_sock(sk);
    		current_timeo = schedule_timeout(current_timeo);
    		sctp_lock_sock(sk);
    
    		*timeo_p = current_timeo;
    	}
    
    out:
    	finish_wait(&asoc->wait, &wait);
    
    	/* Release the association's refcnt.  */
    	sctp_association_put(asoc);
    
    	return err;
    
    do_error:
    
    	if (asoc->init_err_counter + 1 > asoc->max_init_attempts)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = -ETIMEDOUT;
    	else
    		err = -ECONNREFUSED;
    	goto out;
    
    do_interrupted:
    	err = sock_intr_errno(*timeo_p);
    	goto out;
    
    do_nonblock:
    	err = -EINPROGRESS;
    	goto out;
    }
    
    static int sctp_wait_for_accept(struct sock *sk, long timeo)
    {
    	struct sctp_endpoint *ep;
    	int err = 0;
    	DEFINE_WAIT(wait);
    
    	ep = sctp_sk(sk)->ep;
    
    
    	for (;;) {
    		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
    					  TASK_INTERRUPTIBLE);
    
    		if (list_empty(&ep->asocs)) {
    			sctp_release_sock(sk);
    			timeo = schedule_timeout(timeo);
    			sctp_lock_sock(sk);
    		}
    
    		err = -EINVAL;
    		if (!sctp_sstate(sk, LISTENING))
    			break;
    
    		err = 0;
    		if (!list_empty(&ep->asocs))
    			break;
    
    		err = sock_intr_errno(timeo);
    		if (signal_pending(current))
    			break;
    
    		err = -EAGAIN;
    		if (!timeo)
    			break;
    	}
    
    	finish_wait(sk->sk_sleep, &wait);
    
    	return err;
    }
    
    
    static void sctp_wait_for_close(struct sock *sk, long timeout)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	DEFINE_WAIT(wait);
    
    	do {
    		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
    		if (list_empty(&sctp_sk(sk)->ep->asocs))
    			break;
    		sctp_release_sock(sk);
    		timeout = schedule_timeout(timeout);
    		sctp_lock_sock(sk);
    	} while (!signal_pending(current) && timeout);
    
    	finish_wait(sk->sk_sleep, &wait);
    }
    
    
    static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk)
    {
    	struct sk_buff *frag;
    
    	if (!skb->data_len)
    		goto done;
    
    	/* Don't forget the fragments. */
    
    	skb_walk_frags(skb, frag)
    
    		sctp_skb_set_owner_r_frag(frag, sk);
    
    done:
    	sctp_skb_set_owner_r(skb, sk);
    }
    
    
    void sctp_copy_sock(struct sock *newsk, struct sock *sk,
    		    struct sctp_association *asoc)
    {
    	struct inet_sock *inet = inet_sk(sk);
    	struct inet_sock *newinet = inet_sk(newsk);
    
    	newsk->sk_type = sk->sk_type;
    	newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
    	newsk->sk_flags = sk->sk_flags;
    	newsk->sk_no_check = sk->sk_no_check;
    	newsk->sk_reuse = sk->sk_reuse;
    
    	newsk->sk_shutdown = sk->sk_shutdown;
    	newsk->sk_destruct = inet_sock_destruct;
    	newsk->sk_family = sk->sk_family;
    	newsk->sk_protocol = IPPROTO_SCTP;
    	newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
    	newsk->sk_sndbuf = sk->sk_sndbuf;
    	newsk->sk_rcvbuf = sk->sk_rcvbuf;
    	newsk->sk_lingertime = sk->sk_lingertime;
    	newsk->sk_rcvtimeo = sk->sk_rcvtimeo;
    	newsk->sk_sndtimeo = sk->sk_sndtimeo;
    
    	newinet = inet_sk(newsk);
    
    	/* Initialize sk's sport, dport, rcv_saddr and daddr for
    	 * getsockname() and getpeername()
    	 */
    	newinet->sport = inet->sport;
    	newinet->saddr = inet->saddr;
    	newinet->rcv_saddr = inet->rcv_saddr;
    	newinet->dport = htons(asoc->peer.port);
    	newinet->pmtudisc = inet->pmtudisc;
    	newinet->id = asoc->next_tsn ^ jiffies;
    
    	newinet->uc_ttl = inet->uc_ttl;
    	newinet->mc_loop = 1;
    	newinet->mc_ttl = 1;
    	newinet->mc_index = 0;
    	newinet->mc_list = NULL;
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* Populate the fields of the newsk from the oldsk and migrate the assoc
     * and its messages to the newsk.
     */
    static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
    			      struct sctp_association *assoc,
    			      sctp_socket_type_t type)
    {
    	struct sctp_sock *oldsp = sctp_sk(oldsk);
    	struct sctp_sock *newsp = sctp_sk(newsk);
    	struct sctp_bind_bucket *pp; /* hash list port iterator */
    	struct sctp_endpoint *newep = newsp->ep;
    	struct sk_buff *skb, *tmp;
    	struct sctp_ulpevent *event;
    
    	struct sctp_bind_hashbucket *head;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Migrate socket buffer sizes and all the socket level options to the
    	 * new socket.
    	 */
    	newsk->sk_sndbuf = oldsk->sk_sndbuf;
    	newsk->sk_rcvbuf = oldsk->sk_rcvbuf;
    	/* Brute force copy old sctp opt. */
    	inet_sk_copy_descendant(newsk, oldsk);
    
    	/* Restore the ep value that was overwritten with the above structure
    	 * copy.
    	 */
    	newsp->ep = newep;
    	newsp->hmac = NULL;
    
    	/* Hook this new socket in to the bind_hash list. */
    
    	head = &sctp_port_hashtable[sctp_phashfn(inet_sk(oldsk)->num)];
    	sctp_local_bh_disable();
    	sctp_spin_lock(&head->lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	pp = sctp_sk(oldsk)->bind_hash;
    	sk_add_bind_node(newsk, &pp->owner);
    	sctp_sk(newsk)->bind_hash = pp;
    	inet_sk(newsk)->num = inet_sk(oldsk)->num;
    
    	sctp_spin_unlock(&head->lock);
    	sctp_local_bh_enable();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/* Copy the bind_addr list from the original endpoint to the new
    	 * endpoint so that we can handle restarts properly
    	 */
    
    	sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
    				&oldsp->ep->base.bind_addr, GFP_KERNEL);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Move any messages in the old socket's receive queue that are for the
    	 * peeled off association to the new socket's receive queue.
    	 */
    	sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) {
    		event = sctp_skb2event(skb);
    		if (event->asoc == assoc) {
    
    David S. Miller's avatar
    David S. Miller committed
    			__skb_unlink(skb, &oldsk->sk_receive_queue);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			__skb_queue_tail(&newsk->sk_receive_queue, skb);
    
    			sctp_skb_set_owner_r_frag(skb, newsk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	}
    
    	/* Clean up any messages pending delivery due to partial
    	 * delivery.   Three cases:
    	 * 1) No partial deliver;  no work.
    	 * 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby.
    	 * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue.
    	 */
    	skb_queue_head_init(&newsp->pd_lobby);
    
    	atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (atomic_read(&sctp_sk(oldsk)->pd_mode)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		struct sk_buff_head *queue;
    
    		/* Decide which queue to move pd_lobby skbs to. */
    		if (assoc->ulpq.pd_mode) {
    			queue = &newsp->pd_lobby;
    		} else
    			queue = &newsk->sk_receive_queue;
    
    		/* Walk through the pd_lobby, looking for skbs that
    		 * need moved to the new socket.
    		 */
    		sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) {
    			event = sctp_skb2event(skb);
    			if (event->asoc == assoc) {
    
    David S. Miller's avatar
    David S. Miller committed
    				__skb_unlink(skb, &oldsp->pd_lobby);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				__skb_queue_tail(queue, skb);
    
    				sctp_skb_set_owner_r_frag(skb, newsk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			}
    		}
    
    		/* Clear up any skbs waiting for the partial
    		 * delivery to finish.
    		 */
    		if (assoc->ulpq.pd_mode)
    
    			sctp_clear_pd(oldsk, NULL);
    
    	sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp)
    
    	sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Set the type of socket to indicate that it is peeled off from the
    	 * original UDP-style socket or created with the accept() call on a
    	 * TCP-style socket..
    	 */
    	newsp->type = type;
    
    
    	/* Mark the new socket "in-use" by the user so that any packets
    	 * that may arrive on the association after we've moved it are
    	 * queued to the backlog.  This prevents a potential race between
    	 * backlog processing on the old socket and new-packet processing
    	 * on the new socket.
    
    	 *
    	 * The caller has just allocated newsk so we can guarantee that other
    	 * paths won't try to lock it and then oldsk.
    
    	lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	sctp_assoc_migrate(assoc, newsk);
    
    	/* If the association on the newsk is already closed before accept()
    	 * is called, set RCV_SHUTDOWN flag.
    	 */
    	if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP))
    		newsk->sk_shutdown |= RCV_SHUTDOWN;
    
    	newsk->sk_state = SCTP_SS_ESTABLISHED;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* This proto struct describes the ULP interface for SCTP.  */
    struct proto sctp_prot = {
    	.name        =	"SCTP",
    	.owner       =	THIS_MODULE,
    	.close       =	sctp_close,
    	.connect     =	sctp_connect,
    	.disconnect  =	sctp_disconnect,
    	.accept      =	sctp_accept,
    	.ioctl       =	sctp_ioctl,
    	.init        =	sctp_init_sock,
    	.destroy     =	sctp_destroy_sock,
    	.shutdown    =	sctp_shutdown,
    	.setsockopt  =	sctp_setsockopt,
    	.getsockopt  =	sctp_getsockopt,
    	.sendmsg     =	sctp_sendmsg,
    	.recvmsg     =	sctp_recvmsg,
    	.bind        =	sctp_bind,
    	.backlog_rcv =	sctp_backlog_rcv,
    	.hash        =	sctp_hash,
    	.unhash      =	sctp_unhash,
    	.get_port    =	sctp_get_port,
    	.obj_size    =  sizeof(struct sctp_sock),
    
    	.sysctl_mem  =  sysctl_sctp_mem,
    	.sysctl_rmem =  sysctl_sctp_rmem,
    	.sysctl_wmem =  sysctl_sctp_wmem,
    	.memory_pressure = &sctp_memory_pressure,
    	.enter_memory_pressure = sctp_enter_memory_pressure,
    	.memory_allocated = &sctp_memory_allocated,
    
    	.sockets_allocated = &sctp_sockets_allocated,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    };
    
    #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    struct proto sctpv6_prot = {
    	.name		= "SCTPv6",
    	.owner		= THIS_MODULE,
    	.close		= sctp_close,
    	.connect	= sctp_connect,
    	.disconnect	= sctp_disconnect,
    	.accept		= sctp_accept,
    	.ioctl		= sctp_ioctl,
    	.init		= sctp_init_sock,
    	.destroy	= sctp_destroy_sock,
    	.shutdown	= sctp_shutdown,
    	.setsockopt	= sctp_setsockopt,
    	.getsockopt	= sctp_getsockopt,
    	.sendmsg	= sctp_sendmsg,
    	.recvmsg	= sctp_recvmsg,
    	.bind		= sctp_bind,
    	.backlog_rcv	= sctp_backlog_rcv,
    	.hash		= sctp_hash,
    	.unhash		= sctp_unhash,
    	.get_port	= sctp_get_port,
    	.obj_size	= sizeof(struct sctp6_sock),
    
    	.sysctl_mem	= sysctl_sctp_mem,
    	.sysctl_rmem	= sysctl_sctp_rmem,
    	.sysctl_wmem	= sysctl_sctp_wmem,
    	.memory_pressure = &sctp_memory_pressure,
    	.enter_memory_pressure = sctp_enter_memory_pressure,
    	.memory_allocated = &sctp_memory_allocated,
    
    	.sockets_allocated = &sctp_sockets_allocated,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    };
    #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */