Skip to content
Snippets Groups Projects
socket.c 175 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    	struct sctp_bind_hashbucket *head, unsigned short snum);
    
    static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
    {
    	struct sctp_bind_hashbucket *head; /* hash list */
    	struct sctp_bind_bucket *pp; /* hash list port iterator */
    	unsigned short snum;
    	int ret;
    
    
    	snum = ntohs(addr->v4.sin_port);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum);
    	sctp_local_bh_disable();
    
    	if (snum == 0) {
    		/* Search for an available port.
    		 *
    		 * 'sctp_port_rover' was the last port assigned, so
    		 * we start to search from 'sctp_port_rover +
    		 * 1'. What we do is first check if port 'rover' is
    		 * already in the hash table; if not, we use that; if
    		 * it is, we try next.
    		 */
    		int low = sysctl_local_port_range[0];
    		int high = sysctl_local_port_range[1];
    		int remaining = (high - low) + 1;
    		int rover;
    		int index;
    
    		sctp_spin_lock(&sctp_port_alloc_lock);
    		rover = sctp_port_rover;
    		do {
    			rover++;
    			if ((rover < low) || (rover > high))
    				rover = low;
    			index = sctp_phashfn(rover);
    			head = &sctp_port_hashtable[index];
    			sctp_spin_lock(&head->lock);
    			for (pp = head->chain; pp; pp = pp->next)
    				if (pp->port == rover)
    					goto next;
    			break;
    		next:
    			sctp_spin_unlock(&head->lock);
    		} while (--remaining > 0);
    		sctp_port_rover = rover;
    		sctp_spin_unlock(&sctp_port_alloc_lock);
    
    		/* Exhausted local port range during search? */
    		ret = 1;
    		if (remaining <= 0)
    			goto fail;
    
    		/* OK, here is the one we will use.  HEAD (the port
    		 * hash table list entry) is non-NULL and we hold it's
    		 * mutex.
    		 */
    		snum = rover;
    	} else {
    		/* We are given an specific port number; we verify
    		 * that it is not being used. If it is used, we will
    		 * exahust the search in the hash list corresponding
    		 * to the port number (snum) - we detect that with the
    		 * port iterator, pp being NULL.
    		 */
    		head = &sctp_port_hashtable[sctp_phashfn(snum)];
    		sctp_spin_lock(&head->lock);
    		for (pp = head->chain; pp; pp = pp->next) {
    			if (pp->port == snum)
    				goto pp_found;
    		}
    	}
    	pp = NULL;
    	goto pp_not_found;
    pp_found:
    	if (!hlist_empty(&pp->owner)) {
    		/* We had a port hash table hit - there is an
    		 * available port (pp != NULL) and it is being
    		 * used by other socket (pp->owner not empty); that other
    		 * socket is going to be sk2.
    		 */
    		int reuse = sk->sk_reuse;
    		struct sock *sk2;
    		struct hlist_node *node;
    
    		SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
    
    		if (pp->fastreuse && sk->sk_reuse &&
    			sk->sk_state != SCTP_SS_LISTENING)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			goto success;
    
    		/* Run through the list of sockets bound to the port
    		 * (pp->port) [via the pointers bind_next and
    		 * bind_pprev in the struct sock *sk2 (pp->sk)]. On each one,
    		 * we get the endpoint they describe and run through
    		 * the endpoint's list of IP (v4 or v6) addresses,
    		 * comparing each of the addresses with the address of
    		 * the socket sk. If we find a match, then that means
    		 * that this port/socket (sk) combination are already
    		 * in an endpoint.
    		 */
    		sk_for_each_bound(sk2, node, &pp->owner) {
    			struct sctp_endpoint *ep2;
    			ep2 = sctp_sk(sk2)->ep;
    
    
    			if (reuse && sk2->sk_reuse &&
    			    sk2->sk_state != SCTP_SS_LISTENING)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				continue;
    
    
    			if (sctp_bind_addr_match(&ep2->base.bind_addr, addr,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    						 sctp_sk(sk))) {
    				ret = (long)sk2;
    				goto fail_unlock;
    			}
    		}
    		SCTP_DEBUG_PRINTK("sctp_get_port(): Found a match\n");
    	}
    pp_not_found:
    	/* If there was a hash table miss, create a new port.  */
    	ret = 1;
    	if (!pp && !(pp = sctp_bucket_create(head, snum)))
    		goto fail_unlock;
    
    	/* In either case (hit or miss), make sure fastreuse is 1 only
    	 * if sk->sk_reuse is too (that is, if the caller requested
    	 * SO_REUSEADDR on this socket -sk-).
    	 */
    
    	if (hlist_empty(&pp->owner)) {
    		if (sk->sk_reuse && sk->sk_state != SCTP_SS_LISTENING)
    			pp->fastreuse = 1;
    		else
    			pp->fastreuse = 0;
    	} else if (pp->fastreuse &&
    		(!sk->sk_reuse || sk->sk_state == SCTP_SS_LISTENING))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		pp->fastreuse = 0;
    
    	/* We are set, so fill up all the data in the hash table
    	 * entry, tie the socket list information with the rest of the
    	 * sockets FIXME: Blurry, NPI (ipg).
    	 */
    success:
    	if (!sctp_sk(sk)->bind_hash) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		sk_add_bind_node(sk, &pp->owner);
    		sctp_sk(sk)->bind_hash = pp;
    	}
    	ret = 0;
    
    fail_unlock:
    	sctp_spin_unlock(&head->lock);
    
    fail:
    	sctp_local_bh_enable();
    	return ret;
    }
    
    /* Assign a 'snum' port to the socket.  If snum == 0, an ephemeral
     * port is requested.
     */
    static int sctp_get_port(struct sock *sk, unsigned short snum)
    {
    	long ret;
    	union sctp_addr addr;
    	struct sctp_af *af = sctp_sk(sk)->pf->af;
    
    	/* Set up a dummy address struct from the sk. */
    	af->from_sk(&addr, sk);
    	addr.v4.sin_port = htons(snum);
    
    	/* Note: sk->sk_num gets filled in if ephemeral port request. */
    	ret = sctp_get_port_local(sk, &addr);
    
    	return (ret ? 1 : 0);
    }
    
    /*
     * 3.1.3 listen() - UDP Style Syntax
     *
     *   By default, new associations are not accepted for UDP style sockets.
     *   An application uses listen() to mark a socket as being able to
     *   accept new associations.
     */
    SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
    {
    	struct sctp_sock *sp = sctp_sk(sk);
    	struct sctp_endpoint *ep = sp->ep;
    
    	/* Only UDP style sockets that are not peeled off are allowed to
    	 * listen().
    	 */
    	if (!sctp_style(sk, UDP))
    		return -EINVAL;
    
    	/* If backlog is zero, disable listening. */
    	if (!backlog) {
    		if (sctp_sstate(sk, CLOSED))
    			return 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		sctp_unhash_endpoint(ep);
    		sk->sk_state = SCTP_SS_CLOSED;
    	}
    
    	/* Return if we are already listening. */
    	if (sctp_sstate(sk, LISTENING))
    		return 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 * If a bind() or sctp_bindx() is not called prior to a listen()
    	 * call that allows new associations to be accepted, the system
    	 * picks an ephemeral port and will choose an address set equivalent
    	 * to binding with a wildcard address.
    	 *
    	 * This is not currently spelled out in the SCTP sockets
    	 * extensions draft, but follows the practice as seen in TCP
    	 * sockets.
    
    	 *
    	 * Additionally, turn off fastreuse flag since we are not listening
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    
    	sk->sk_state = SCTP_SS_LISTENING;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!ep->base.bind_addr.port) {
    		if (sctp_autobind(sk))
    			return -EAGAIN;
    
    	} else
    		sctp_sk(sk)->bind_hash->fastreuse = 0;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	sctp_hash_endpoint(ep);
    	return 0;
    }
    
    /*
     * 4.1.3 listen() - TCP Style Syntax
     *
     *   Applications uses listen() to ready the SCTP endpoint for accepting
     *   inbound associations.
     */
    SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
    {
    	struct sctp_sock *sp = sctp_sk(sk);
    	struct sctp_endpoint *ep = sp->ep;
    
    	/* If backlog is zero, disable listening. */
    	if (!backlog) {
    		if (sctp_sstate(sk, CLOSED))
    			return 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		sctp_unhash_endpoint(ep);
    		sk->sk_state = SCTP_SS_CLOSED;
    	}
    
    	if (sctp_sstate(sk, LISTENING))
    		return 0;
    
    	/*
    	 * If a bind() or sctp_bindx() is not called prior to a listen()
    	 * call that allows new associations to be accepted, the system
    	 * picks an ephemeral port and will choose an address set equivalent
    	 * to binding with a wildcard address.
    	 *
    	 * This is not currently spelled out in the SCTP sockets
    	 * extensions draft, but follows the practice as seen in TCP
    	 * sockets.
    	 */
    
    	sk->sk_state = SCTP_SS_LISTENING;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!ep->base.bind_addr.port) {
    		if (sctp_autobind(sk))
    			return -EAGAIN;
    
    	} else
    		sctp_sk(sk)->bind_hash->fastreuse = 0;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	sk->sk_max_ack_backlog = backlog;
    	sctp_hash_endpoint(ep);
    	return 0;
    }
    
    /*
     *  Move a socket to LISTENING state.
     */
    int sctp_inet_listen(struct socket *sock, int backlog)
    {
    	struct sock *sk = sock->sk;
    
    	struct crypto_hash *tfm = NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int err = -EINVAL;
    
    	if (unlikely(backlog < 0))
    		goto out;
    
    	sctp_lock_sock(sk);
    
    	if (sock->state != SS_UNCONNECTED)
    		goto out;
    
    	/* Allocate HMAC for generating cookie. */
    	if (sctp_hmac_alg) {
    
    		tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
    
    		if (IS_ERR(tfm)) {
    			if (net_ratelimit()) {
    				printk(KERN_INFO
    				       "SCTP: failed to load transform for %s: %ld\n",
    					sctp_hmac_alg, PTR_ERR(tfm));
    			}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			err = -ENOSYS;
    			goto out;
    		}
    	}
    
    	switch (sock->type) {
    	case SOCK_SEQPACKET:
    		err = sctp_seqpacket_listen(sk, backlog);
    		break;
    	case SOCK_STREAM:
    		err = sctp_stream_listen(sk, backlog);
    		break;
    	default:
    		break;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (err)
    		goto cleanup;
    
    	/* Store away the transform reference. */
    	sctp_sk(sk)->hmac = tfm;
    out:
    	sctp_release_sock(sk);
    	return err;
    cleanup:
    
    	crypto_free_hash(tfm);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	goto out;
    }
    
    /*
     * This function is done by modeling the current datagram_poll() and the
     * tcp_poll().  Note that, based on these implementations, we don't
     * lock the socket in this function, even though it seems that,
     * ideally, locking or some other mechanisms can be used to ensure
    
     * the integrity of the counters (sndbuf and wmem_alloc) used
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * in this place.  We assume that we don't need locks either until proven
     * otherwise.
     *
     * Another thing to note is that we include the Async I/O support
     * here, again, by modeling the current TCP/UDP code.  We don't have
     * a good way to test with it yet.
     */
    unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
    {
    	struct sock *sk = sock->sk;
    	struct sctp_sock *sp = sctp_sk(sk);
    	unsigned int mask;
    
    	poll_wait(file, sk->sk_sleep, wait);
    
    	/* A TCP-style listening socket becomes readable when the accept queue
    	 * is not empty.
    	 */
    	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
    		return (!list_empty(&sp->ep->asocs)) ?
    
    			(POLLIN | POLLRDNORM) : 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	mask = 0;
    
    	/* Is there any exceptional events?  */
    	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
    		mask |= POLLERR;
    
    	if (sk->sk_shutdown & RCV_SHUTDOWN)
    		mask |= POLLRDHUP;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (sk->sk_shutdown == SHUTDOWN_MASK)
    		mask |= POLLHUP;
    
    	/* Is it readable?  Reconsider this code with TCP-style support.  */
    	if (!skb_queue_empty(&sk->sk_receive_queue) ||
    	    (sk->sk_shutdown & RCV_SHUTDOWN))
    		mask |= POLLIN | POLLRDNORM;
    
    	/* The association is either gone or not ready.  */
    	if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED))
    		return mask;
    
    	/* Is it writable?  */
    	if (sctp_writeable(sk)) {
    		mask |= POLLOUT | POLLWRNORM;
    	} else {
    		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
    		/*
    		 * Since the socket is not locked, the buffer
    		 * might be made available after the writeable check and
    		 * before the bit is set.  This could cause a lost I/O
    		 * signal.  tcp_poll() has a race breaker for this race
    		 * condition.  Based on their implementation, we put
    		 * in the following code to cover it as well.
    		 */
    		if (sctp_writeable(sk))
    			mask |= POLLOUT | POLLWRNORM;
    	}
    	return mask;
    }
    
    /********************************************************************
     * 2nd Level Abstractions
     ********************************************************************/
    
    static struct sctp_bind_bucket *sctp_bucket_create(
    	struct sctp_bind_hashbucket *head, unsigned short snum)
    {
    	struct sctp_bind_bucket *pp;
    
    
    	pp = kmem_cache_alloc(sctp_bucket_cachep, GFP_ATOMIC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	SCTP_DBG_OBJCNT_INC(bind_bucket);
    	if (pp) {
    		pp->port = snum;
    		pp->fastreuse = 0;
    		INIT_HLIST_HEAD(&pp->owner);
    		if ((pp->next = head->chain) != NULL)
    			pp->next->pprev = &pp->next;
    		head->chain = pp;
    		pp->pprev = &head->chain;
    	}
    	return pp;
    }
    
    /* Caller must hold hashbucket lock for this tb with local BH disabled */
    static void sctp_bucket_destroy(struct sctp_bind_bucket *pp)
    {
    
    	if (pp && hlist_empty(&pp->owner)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (pp->next)
    			pp->next->pprev = pp->pprev;
    		*(pp->pprev) = pp->next;
    		kmem_cache_free(sctp_bucket_cachep, pp);
    		SCTP_DBG_OBJCNT_DEC(bind_bucket);
    	}
    }
    
    /* Release this socket's reference to a local port.  */
    static inline void __sctp_put_port(struct sock *sk)
    {
    	struct sctp_bind_hashbucket *head =
    		&sctp_port_hashtable[sctp_phashfn(inet_sk(sk)->num)];
    	struct sctp_bind_bucket *pp;
    
    	sctp_spin_lock(&head->lock);
    	pp = sctp_sk(sk)->bind_hash;
    	__sk_del_bind_node(sk);
    	sctp_sk(sk)->bind_hash = NULL;
    	inet_sk(sk)->num = 0;
    	sctp_bucket_destroy(pp);
    	sctp_spin_unlock(&head->lock);
    }
    
    void sctp_put_port(struct sock *sk)
    {
    	sctp_local_bh_disable();
    	__sctp_put_port(sk);
    	sctp_local_bh_enable();
    }
    
    /*
     * The system picks an ephemeral port and choose an address set equivalent
     * to binding with a wildcard address.
     * One of those addresses will be the primary address for the association.
     * This automatically enables the multihoming capability of SCTP.
     */
    static int sctp_autobind(struct sock *sk)
    {
    	union sctp_addr autoaddr;
    	struct sctp_af *af;
    
    	__be16 port;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Initialize a local sockaddr structure to INADDR_ANY. */
    	af = sctp_sk(sk)->pf->af;
    
    	port = htons(inet_sk(sk)->num);
    	af->inaddr_any(&autoaddr, port);
    
    	return sctp_do_bind(sk, &autoaddr, af->sockaddr_len);
    }
    
    /* Parse out IPPROTO_SCTP CMSG headers.  Perform only minimal validation.
     *
     * From RFC 2292
     * 4.2 The cmsghdr Structure *
     *
     * When ancillary data is sent or received, any number of ancillary data
     * objects can be specified by the msg_control and msg_controllen members of
     * the msghdr structure, because each object is preceded by
     * a cmsghdr structure defining the object's length (the cmsg_len member).
     * Historically Berkeley-derived implementations have passed only one object
     * at a time, but this API allows multiple objects to be
     * passed in a single call to sendmsg() or recvmsg(). The following example
     * shows two ancillary data objects in a control buffer.
     *
     *   |<--------------------------- msg_controllen -------------------------->|
     *   |                                                                       |
     *
     *   |<----- ancillary data object ----->|<----- ancillary data object ----->|
     *
     *   |<---------- CMSG_SPACE() --------->|<---------- CMSG_SPACE() --------->|
     *   |                                   |                                   |
     *
     *   |<---------- cmsg_len ---------->|  |<--------- cmsg_len ----------->|  |
     *
     *   |<--------- CMSG_LEN() --------->|  |<-------- CMSG_LEN() ---------->|  |
     *   |                                |  |                                |  |
     *
     *   +-----+-----+-----+--+-----------+--+-----+-----+-----+--+-----------+--+
     *   |cmsg_|cmsg_|cmsg_|XX|           |XX|cmsg_|cmsg_|cmsg_|XX|           |XX|
     *
     *   |len  |level|type |XX|cmsg_data[]|XX|len  |level|type |XX|cmsg_data[]|XX|
     *
     *   +-----+-----+-----+--+-----------+--+-----+-----+-----+--+-----------+--+
     *    ^
     *    |
     *
     * msg_control
     * points here
     */
    SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *msg,
    				  sctp_cmsgs_t *cmsgs)
    {
    	struct cmsghdr *cmsg;
    
    	for (cmsg = CMSG_FIRSTHDR(msg);
    	     cmsg != NULL;
    	     cmsg = CMSG_NXTHDR((struct msghdr*)msg, cmsg)) {
    		if (!CMSG_OK(msg, cmsg))
    			return -EINVAL;
    
    		/* Should we parse this header or ignore?  */
    		if (cmsg->cmsg_level != IPPROTO_SCTP)
    			continue;
    
    		/* Strictly check lengths following example in SCM code.  */
    		switch (cmsg->cmsg_type) {
    		case SCTP_INIT:
    			/* SCTP Socket API Extension
    			 * 5.2.1 SCTP Initiation Structure (SCTP_INIT)
    			 *
    			 * This cmsghdr structure provides information for
    			 * initializing new SCTP associations with sendmsg().
    			 * The SCTP_INITMSG socket option uses this same data
    			 * structure.  This structure is not used for
    			 * recvmsg().
    			 *
    			 * cmsg_level    cmsg_type      cmsg_data[]
    			 * ------------  ------------   ----------------------
    			 * IPPROTO_SCTP  SCTP_INIT      struct sctp_initmsg
    			 */
    			if (cmsg->cmsg_len !=
    			    CMSG_LEN(sizeof(struct sctp_initmsg)))
    				return -EINVAL;
    			cmsgs->init = (struct sctp_initmsg *)CMSG_DATA(cmsg);
    			break;
    
    		case SCTP_SNDRCV:
    			/* SCTP Socket API Extension
    			 * 5.2.2 SCTP Header Information Structure(SCTP_SNDRCV)
    			 *
    			 * This cmsghdr structure specifies SCTP options for
    			 * sendmsg() and describes SCTP header information
    			 * about a received message through recvmsg().
    			 *
    			 * cmsg_level    cmsg_type      cmsg_data[]
    			 * ------------  ------------   ----------------------
    			 * IPPROTO_SCTP  SCTP_SNDRCV    struct sctp_sndrcvinfo
    			 */
    			if (cmsg->cmsg_len !=
    			    CMSG_LEN(sizeof(struct sctp_sndrcvinfo)))
    				return -EINVAL;
    
    			cmsgs->info =
    				(struct sctp_sndrcvinfo *)CMSG_DATA(cmsg);
    
    			/* Minimally, validate the sinfo_flags. */
    			if (cmsgs->info->sinfo_flags &
    
    			    ~(SCTP_UNORDERED | SCTP_ADDR_OVER |
    			      SCTP_ABORT | SCTP_EOF))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				return -EINVAL;
    			break;
    
    		default:
    			return -EINVAL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    	return 0;
    }
    
    /*
     * Wait for a packet..
     * Note: This function is the same function as in core/datagram.c
     * with a few modifications to make lksctp work.
     */
    static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
    {
    	int error;
    	DEFINE_WAIT(wait);
    
    	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
    
    	/* Socket errors? */
    	error = sock_error(sk);
    	if (error)
    		goto out;
    
    	if (!skb_queue_empty(&sk->sk_receive_queue))
    		goto ready;
    
    	/* Socket shut down?  */
    	if (sk->sk_shutdown & RCV_SHUTDOWN)
    		goto out;
    
    	/* Sequenced packets can come disconnected.  If so we report the
    	 * problem.
    	 */
    	error = -ENOTCONN;
    
    	/* Is there a good reason to think that we may receive some data?  */
    	if (list_empty(&sctp_sk(sk)->ep->asocs) && !sctp_sstate(sk, LISTENING))
    		goto out;
    
    	/* Handle signals.  */
    	if (signal_pending(current))
    		goto interrupted;
    
    	/* Let another process have a go.  Since we are going to sleep
    	 * anyway.  Note: This may cause odd behaviors if the message
    	 * does not fit in the user's buffer, but this seems to be the
    	 * only way to honor MSG_DONTWAIT realistically.
    	 */
    	sctp_release_sock(sk);
    	*timeo_p = schedule_timeout(*timeo_p);
    	sctp_lock_sock(sk);
    
    ready:
    	finish_wait(sk->sk_sleep, &wait);
    	return 0;
    
    interrupted:
    	error = sock_intr_errno(*timeo_p);
    
    out:
    	finish_wait(sk->sk_sleep, &wait);
    	*err = error;
    	return error;
    }
    
    /* Receive a datagram.
     * Note: This is pretty much the same routine as in core/datagram.c
     * with a few changes to make lksctp work.
     */
    static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags,
    					      int noblock, int *err)
    {
    	int error;
    	struct sk_buff *skb;
    	long timeo;
    
    	timeo = sock_rcvtimeo(sk, noblock);
    
    	SCTP_DEBUG_PRINTK("Timeout: timeo: %ld, MAX: %ld.\n",
    			  timeo, MAX_SCHEDULE_TIMEOUT);
    
    	do {
    		/* Again only user level code calls this function,
    		 * so nothing interrupt level
    		 * will suddenly eat the receive_queue.
    		 *
    		 *  Look at current nfs client by the way...
    		 *  However, this function was corrent in any case. 8)
    		 */
    		if (flags & MSG_PEEK) {
    
    			spin_lock_bh(&sk->sk_receive_queue.lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			skb = skb_peek(&sk->sk_receive_queue);
    			if (skb)
    				atomic_inc(&skb->users);
    
    			spin_unlock_bh(&sk->sk_receive_queue.lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		} else {
    			skb = skb_dequeue(&sk->sk_receive_queue);
    		}
    
    		if (skb)
    			return skb;
    
    
    		/* Caller is allowed not to check sk->sk_err before calling. */
    		error = sock_error(sk);
    		if (error)
    			goto no_packet;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (sk->sk_shutdown & RCV_SHUTDOWN)
    			break;
    
    		/* User doesn't want to wait.  */
    		error = -EAGAIN;
    		if (!timeo)
    			goto no_packet;
    	} while (sctp_wait_for_packet(sk, err, &timeo) == 0);
    
    	return NULL;
    
    no_packet:
    	*err = error;
    	return NULL;
    }
    
    /* If sndbuf has changed, wake up per association sndbuf waiters.  */
    static void __sctp_write_space(struct sctp_association *asoc)
    {
    	struct sock *sk = asoc->base.sk;
    	struct socket *sock = sk->sk_socket;
    
    	if ((sctp_wspace(asoc) > 0) && sock) {
    		if (waitqueue_active(&asoc->wait))
    			wake_up_interruptible(&asoc->wait);
    
    		if (sctp_writeable(sk)) {
    			if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
    				wake_up_interruptible(sk->sk_sleep);
    
    			/* Note that we try to include the Async I/O support
    			 * here by modeling from the current TCP/UDP code.
    			 * We have not tested with it yet.
    			 */
    			if (sock->fasync_list &&
    			    !(sk->sk_shutdown & SEND_SHUTDOWN))
    				sock_wake_async(sock, 2, POLL_OUT);
    		}
    	}
    }
    
    /* Do accounting for the sndbuf space.
     * Decrement the used sndbuf space of the corresponding association by the
     * data size which was just transmitted(freed).
     */
    static void sctp_wfree(struct sk_buff *skb)
    {
    	struct sctp_association *asoc;
    	struct sctp_chunk *chunk;
    	struct sock *sk;
    
    	/* Get the saved chunk pointer.  */
    	chunk = *((struct sctp_chunk **)(skb->cb));
    	asoc = chunk->asoc;
    	sk = asoc->base.sk;
    
    	asoc->sndbuf_used -= SCTP_DATA_SNDSIZE(chunk) +
    				sizeof(struct sk_buff) +
    				sizeof(struct sctp_chunk);
    
    	atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
    
    	sock_wfree(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	__sctp_write_space(asoc);
    
    	sctp_association_put(asoc);
    }
    
    
    /* Do accounting for the receive space on the socket.
     * Accounting for the association is done in ulpevent.c
     * We set this as a destructor for the cloned data skbs so that
     * accounting is done at the correct time.
     */
    void sctp_sock_rfree(struct sk_buff *skb)
    {
    	struct sock *sk = skb->sk;
    	struct sctp_ulpevent *event = sctp_skb2event(skb);
    
    	atomic_sub(event->rmem_len, &sk->sk_rmem_alloc);
    }
    
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* Helper function to wait for space in the sndbuf.  */
    static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
    				size_t msg_len)
    {
    	struct sock *sk = asoc->base.sk;
    	int err = 0;
    	long current_timeo = *timeo_p;
    	DEFINE_WAIT(wait);
    
    	SCTP_DEBUG_PRINTK("wait_for_sndbuf: asoc=%p, timeo=%ld, msg_len=%zu\n",
    
    			  asoc, (long)(*timeo_p), msg_len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Increment the association's refcnt.  */
    	sctp_association_hold(asoc);
    
    	/* Wait on the association specific sndbuf space. */
    	for (;;) {
    		prepare_to_wait_exclusive(&asoc->wait, &wait,
    					  TASK_INTERRUPTIBLE);
    		if (!*timeo_p)
    			goto do_nonblock;
    		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
    		    asoc->base.dead)
    			goto do_error;
    		if (signal_pending(current))
    			goto do_interrupted;
    		if (msg_len <= sctp_wspace(asoc))
    			break;
    
    		/* Let another process have a go.  Since we are going
    		 * to sleep anyway.
    		 */
    		sctp_release_sock(sk);
    		current_timeo = schedule_timeout(current_timeo);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		sctp_lock_sock(sk);
    
    		*timeo_p = current_timeo;
    	}
    
    out:
    	finish_wait(&asoc->wait, &wait);
    
    	/* Release the association's refcnt.  */
    	sctp_association_put(asoc);
    
    	return err;
    
    do_error:
    	err = -EPIPE;
    	goto out;
    
    do_interrupted:
    	err = sock_intr_errno(*timeo_p);
    	goto out;
    
    do_nonblock:
    	err = -EAGAIN;
    	goto out;
    }
    
    /* If socket sndbuf has changed, wake up all per association waiters.  */
    void sctp_write_space(struct sock *sk)
    {
    	struct sctp_association *asoc;
    	struct list_head *pos;
    
    	/* Wake up the tasks in each wait queue.  */
    	list_for_each(pos, &((sctp_sk(sk))->ep->asocs)) {
    		asoc = list_entry(pos, struct sctp_association, asocs);
    		__sctp_write_space(asoc);
    	}
    }
    
    /* Is there any sndbuf space available on the socket?
     *
    
     * Note that sk_wmem_alloc is the sum of the send buffers on all of the
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * associations on the same socket.  For a UDP-style socket with
     * multiple associations, it is possible for it to be "unwriteable"
     * prematurely.  I assume that this is acceptable because
     * a premature "unwriteable" is better than an accidental "writeable" which
     * would cause an unwanted block under certain circumstances.  For the 1-1
     * UDP-style sockets or TCP-style sockets, this code should work.
     *  - Daisy
     */
    static int sctp_writeable(struct sock *sk)
    {
    	int amt = 0;
    
    
    	amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (amt < 0)
    		amt = 0;
    	return amt;
    }
    
    /* Wait for an association to go into ESTABLISHED state. If timeout is 0,
     * returns immediately with EINPROGRESS.
     */
    static int sctp_wait_for_connect(struct sctp_association *asoc, long *timeo_p)
    {
    	struct sock *sk = asoc->base.sk;
    	int err = 0;
    	long current_timeo = *timeo_p;
    	DEFINE_WAIT(wait);
    
    	SCTP_DEBUG_PRINTK("%s: asoc=%p, timeo=%ld\n", __FUNCTION__, asoc,
    			  (long)(*timeo_p));
    
    	/* Increment the association's refcnt.  */
    	sctp_association_hold(asoc);
    
    	for (;;) {
    		prepare_to_wait_exclusive(&asoc->wait, &wait,
    					  TASK_INTERRUPTIBLE);
    		if (!*timeo_p)
    			goto do_nonblock;
    		if (sk->sk_shutdown & RCV_SHUTDOWN)
    			break;
    		if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING ||
    		    asoc->base.dead)
    			goto do_error;
    		if (signal_pending(current))
    			goto do_interrupted;
    
    		if (sctp_state(asoc, ESTABLISHED))
    			break;
    
    		/* Let another process have a go.  Since we are going
    		 * to sleep anyway.
    		 */
    		sctp_release_sock(sk);
    		current_timeo = schedule_timeout(current_timeo);
    		sctp_lock_sock(sk);
    
    		*timeo_p = current_timeo;
    	}
    
    out:
    	finish_wait(&asoc->wait, &wait);
    
    	/* Release the association's refcnt.  */
    	sctp_association_put(asoc);
    
    	return err;
    
    do_error:
    
    	if (asoc->init_err_counter + 1 > asoc->max_init_attempts)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = -ETIMEDOUT;
    	else
    		err = -ECONNREFUSED;
    	goto out;
    
    do_interrupted:
    	err = sock_intr_errno(*timeo_p);
    	goto out;
    
    do_nonblock:
    	err = -EINPROGRESS;
    	goto out;
    }
    
    static int sctp_wait_for_accept(struct sock *sk, long timeo)
    {
    	struct sctp_endpoint *ep;
    	int err = 0;
    	DEFINE_WAIT(wait);
    
    	ep = sctp_sk(sk)->ep;
    
    
    	for (;;) {
    		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
    					  TASK_INTERRUPTIBLE);
    
    		if (list_empty(&ep->asocs)) {
    			sctp_release_sock(sk);
    			timeo = schedule_timeout(timeo);
    			sctp_lock_sock(sk);
    		}
    
    		err = -EINVAL;
    		if (!sctp_sstate(sk, LISTENING))
    			break;
    
    		err = 0;
    		if (!list_empty(&ep->asocs))
    			break;
    
    		err = sock_intr_errno(timeo);
    		if (signal_pending(current))
    			break;
    
    		err = -EAGAIN;
    		if (!timeo)
    			break;
    	}
    
    	finish_wait(sk->sk_sleep, &wait);
    
    	return err;
    }
    
    void sctp_wait_for_close(struct sock *sk, long timeout)
    {
    	DEFINE_WAIT(wait);
    
    	do {
    		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
    		if (list_empty(&sctp_sk(sk)->ep->asocs))
    			break;
    		sctp_release_sock(sk);
    		timeout = schedule_timeout(timeout);
    		sctp_lock_sock(sk);
    	} while (!signal_pending(current) && timeout);
    
    	finish_wait(sk->sk_sleep, &wait);
    }
    
    
    static void sctp_sock_rfree_frag(struct sk_buff *skb)
    {
    	struct sk_buff *frag;
    
    	if (!skb->data_len)
    		goto done;
    
    	/* Don't forget the fragments. */
    	for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next)
    		sctp_sock_rfree_frag(frag);
    
    done:
    	sctp_sock_rfree(skb);
    }
    
    static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk)
    {
    	struct sk_buff *frag;