Skip to content
Snippets Groups Projects
socket.c 175 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	while (walk_size < addrs_size) {
    
    		sa_addr = (union sctp_addr *)addr_buf;
    		af = sctp_get_af_specific(sa_addr->sa.sa_family);
    
    		port = ntohs(sa_addr->v4.sin_port);
    
    
    		/* If the address family is not supported or if this address
    		 * causes the address buffer to overflow return EINVAL.
    		 */
    		if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
    			err = -EINVAL;
    			goto out_free;
    		}
    
    
    		err = sctp_verify_addr(sk, sa_addr, af->sockaddr_len);
    
    		if (err)
    			goto out_free;
    
    
    		/* Make sure the destination port is correctly set
    		 * in all addresses.
    		 */
    		if (asoc && asoc->peer.port && asoc->peer.port != port)
    			goto out_free;
    
    
    		memcpy(&to, sa_addr, af->sockaddr_len);
    
    		/* Check if there already is a matching association on the
    		 * endpoint (other than the one created here).
    		 */
    
    		asoc2 = sctp_endpoint_lookup_assoc(ep, sa_addr, &transport);
    
    		if (asoc2 && asoc2 != asoc) {
    			if (asoc2->state >= SCTP_STATE_ESTABLISHED)
    				err = -EISCONN;
    			else
    				err = -EALREADY;
    			goto out_free;
    		}
    
    		/* If we could not find a matching association on the endpoint,
    		 * make sure that there is no peeled-off association matching
    		 * the peer address even on another socket.
    		 */
    
    		if (sctp_endpoint_is_peeled_off(ep, sa_addr)) {
    
    			err = -EADDRNOTAVAIL;
    			goto out_free;
    		}
    
    		if (!asoc) {
    			/* If a bind() or sctp_bindx() is not called prior to
    			 * an sctp_connectx() call, the system picks an
    			 * ephemeral port and will choose an address set
    			 * equivalent to binding with a wildcard address.
    			 */
    			if (!ep->base.bind_addr.port) {
    				if (sctp_autobind(sk)) {
    					err = -EAGAIN;
    					goto out_free;
    				}
    
    				 * If an unprivileged user inherits a 1-many
    				 * style socket with open associations on a
    				 * privileged port, it MAY be permitted to
    				 * accept new associations, but it SHOULD NOT
    
    				 * be permitted to open new associations.
    				 */
    				if (ep->base.bind_addr.port < PROT_SOCK &&
    				    !capable(CAP_NET_BIND_SERVICE)) {
    					err = -EACCES;
    					goto out_free;
    				}
    
    			scope = sctp_scope(sa_addr);
    
    			asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
    			if (!asoc) {
    				err = -ENOMEM;
    				goto out_free;
    			}
    		}
    
    		/* Prime the peer's transport structures.  */
    
    		transport = sctp_assoc_add_peer(asoc, sa_addr, GFP_KERNEL,
    
    						SCTP_UNKNOWN);
    		if (!transport) {
    			err = -ENOMEM;
    			goto out_free;
    		}
    
    		addrcnt++;
    		addr_buf += af->sockaddr_len;
    		walk_size += af->sockaddr_len;
    	}
    
    	err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL);
    	if (err < 0) {
    		goto out_free;
    	}
    
    	err = sctp_primitive_ASSOCIATE(asoc, NULL);
    	if (err < 0) {
    		goto out_free;
    	}
    
    	/* Initialize sk's dport and daddr for getpeername() */
    	inet_sk(sk)->dport = htons(asoc->peer.port);
    	af = sctp_get_af_specific(to.sa.sa_family);
    	af->to_sk_daddr(&to, sk);
    
    	/* in-kernel sockets don't generally have a file allocated to them
    	 * if all they do is call sock_create_kern().
    	 */
    	if (sk->sk_socket->file)
    		f_flags = sk->sk_socket->file->f_flags;
    
    	timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK);
    
    
    	err = sctp_wait_for_connect(asoc, &timeo);
    
    	/* Don't free association on exit. */
    	asoc = NULL;
    
    out_free:
    
    	SCTP_DEBUG_PRINTK("About to exit __sctp_connect() free asoc: %p"
    
    			  " kaddrs: %p err: %d\n",
    			  asoc, kaddrs, err);
    
    	if (asoc)
    		sctp_association_free(asoc);
    	return err;
    }
    
    /* Helper for tunneling sctp_connectx() requests through sctp_setsockopt()
     *
     * API 8.9
     * int sctp_connectx(int sd, struct sockaddr *addrs, int addrcnt);
     *
     * If sd is an IPv4 socket, the addresses passed must be IPv4 addresses.
     * If the sd is an IPv6 socket, the addresses passed can either be IPv4
     * or IPv6 addresses.
     *
     * A single address may be specified as INADDR_ANY or IN6ADDR_ANY, see
     * Section 3.1.2 for this usage.
     *
     * addrs is a pointer to an array of one or more socket addresses. Each
     * address is contained in its appropriate structure (i.e. struct
     * sockaddr_in or struct sockaddr_in6) the family of the address type
     * must be used to distengish the address length (note that this
     * representation is termed a "packed array" of addresses). The caller
     * specifies the number of addresses in the array with addrcnt.
     *
     * On success, sctp_connectx() returns 0. On failure, sctp_connectx() returns
     * -1, and sets errno to the appropriate error code.
     *
     * For SCTP, the port given in each socket address must be the same, or
     * sctp_connectx() will fail, setting errno to EINVAL.
     *
     * An application can use sctp_connectx to initiate an association with
     * an endpoint that is multi-homed.  Much like sctp_bindx() this call
     * allows a caller to specify multiple addresses at which a peer can be
     * reached.  The way the SCTP stack uses the list of addresses to set up
     * the association is implementation dependant.  This function only
     * specifies that the stack will try to make use of all the addresses in
     * the list when needed.
     *
     * Note that the list of addresses passed in is only used for setting up
     * the association.  It does not necessarily equal the set of addresses
     * the peer uses for the resulting association.  If the caller wants to
     * find out the set of peer addresses, it must use sctp_getpaddrs() to
     * retrieve them after the association has been set up.
     *
     * Basically do nothing but copying the addresses from user to kernel
     * land and invoking either sctp_connectx(). This is used for tunneling
     * the sctp_connectx() request through sctp_setsockopt() from userspace.
     *
     * We don't use copy_from_user() for optimization: we first do the
     * sanity checks (buffer size -fast- and access check-healthy
     * pointer); if all of those succeed, then we can alloc the memory
     * (expensive operation) needed to copy the data to kernel. Then we do
     * the copying without checking the user space area
     * (__copy_from_user()).
     *
     * On exit there is no need to do sockfd_put(), sys_setsockopt() does
     * it.
     *
     * sk        The sk of the socket
     * addrs     The pointer to the addresses in user land
     * addrssize Size of the addrs buffer
     *
     * Returns 0 if ok, <0 errno code on error.
     */
    SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
    				      struct sockaddr __user *addrs,
    				      int addrs_size)
    {
    	int err = 0;
    	struct sockaddr *kaddrs;
    
    	SCTP_DEBUG_PRINTK("%s - sk %p addrs %p addrs_size %d\n",
    			  __FUNCTION__, sk, addrs, addrs_size);
    
    	if (unlikely(addrs_size <= 0))
    		return -EINVAL;
    
    	/* Check the user passed a healthy pointer.  */
    	if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size)))
    		return -EFAULT;
    
    	/* Alloc space for the address array in kernel memory.  */
    
    	kaddrs = kmalloc(addrs_size, GFP_KERNEL);
    
    	if (unlikely(!kaddrs))
    		return -ENOMEM;
    
    	if (__copy_from_user(kaddrs, addrs, addrs_size)) {
    		err = -EFAULT;
    	} else {
    		err = __sctp_connect(sk, kaddrs, addrs_size);
    	}
    
    	kfree(kaddrs);
    	return err;
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* API 3.1.4 close() - UDP Style Syntax
     * Applications use close() to perform graceful shutdown (as described in
     * Section 10.1 of [SCTP]) on ALL the associations currently represented
     * by a UDP-style socket.
     *
     * The syntax is
     *
     *   ret = close(int sd);
     *
     *   sd      - the socket descriptor of the associations to be closed.
     *
     * To gracefully shutdown a specific association represented by the
     * UDP-style socket, an application should use the sendmsg() call,
     * passing no user data, but including the appropriate flag in the
     * ancillary data (see Section xxxx).
     *
     * If sd in the close() call is a branched-off socket representing only
     * one association, the shutdown is performed on that association only.
     *
     * 4.1.6 close() - TCP Style Syntax
     *
     * Applications use close() to gracefully close down an association.
     *
     * The syntax is:
     *
     *    int close(int sd);
     *
     *      sd      - the socket descriptor of the association to be closed.
     *
     * After an application calls close() on a socket descriptor, no further
     * socket operations will succeed on that descriptor.
     *
     * API 7.1.4 SO_LINGER
     *
     * An application using the TCP-style socket can use this option to
     * perform the SCTP ABORT primitive.  The linger option structure is:
     *
     *  struct  linger {
     *     int     l_onoff;                // option on/off
     *     int     l_linger;               // linger time
     * };
     *
     * To enable the option, set l_onoff to 1.  If the l_linger value is set
     * to 0, calling close() is the same as the ABORT primitive.  If the
     * value is set to a negative value, the setsockopt() call will return
     * an error.  If the value is set to a positive value linger_time, the
     * close() can be blocked for at most linger_time ms.  If the graceful
     * shutdown phase does not finish during this period, close() will
     * return but the graceful shutdown phase continues in the system.
     */
    SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
    {
    	struct sctp_endpoint *ep;
    	struct sctp_association *asoc;
    	struct list_head *pos, *temp;
    
    	SCTP_DEBUG_PRINTK("sctp_close(sk: 0x%p, timeout:%ld)\n", sk, timeout);
    
    	sctp_lock_sock(sk);
    	sk->sk_shutdown = SHUTDOWN_MASK;
    
    	ep = sctp_sk(sk)->ep;
    
    
    	/* Walk all associations on an endpoint.  */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	list_for_each_safe(pos, temp, &ep->asocs) {
    		asoc = list_entry(pos, struct sctp_association, asocs);
    
    		if (sctp_style(sk, TCP)) {
    			/* A closed association can still be in the list if
    			 * it belongs to a TCP-style listening socket that is
    			 * not yet accepted. If so, free it. If not, send an
    			 * ABORT or SHUTDOWN based on the linger options.
    			 */
    			if (sctp_state(asoc, CLOSED)) {
    				sctp_unhash_established(asoc);
    				sctp_association_free(asoc);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
    			struct sctp_chunk *chunk;
    
    			chunk = sctp_make_abort_user(asoc, NULL, 0);
    			if (chunk)
    				sctp_primitive_ABORT(asoc, chunk);
    		} else
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			sctp_primitive_SHUTDOWN(asoc, NULL);
    	}
    
    	/* Clean up any skbs sitting on the receive queue.  */
    	sctp_queue_purge_ulpevents(&sk->sk_receive_queue);
    	sctp_queue_purge_ulpevents(&sctp_sk(sk)->pd_lobby);
    
    	/* On a TCP-style socket, block for at most linger_time if set. */
    	if (sctp_style(sk, TCP) && timeout)
    		sctp_wait_for_close(sk, timeout);
    
    	/* This will run the backlog queue.  */
    	sctp_release_sock(sk);
    
    	/* Supposedly, no process has access to the socket, but
    	 * the net layers still may.
    	 */
    	sctp_local_bh_disable();
    	sctp_bh_lock_sock(sk);
    
    	/* Hold the sock, since sk_common_release() will put sock_put()
    	 * and we have just a little more cleanup.
    	 */
    	sock_hold(sk);
    	sk_common_release(sk);
    
    	sctp_bh_unlock_sock(sk);
    	sctp_local_bh_enable();
    
    	sock_put(sk);
    
    	SCTP_DBG_OBJCNT_DEC(sock);
    }
    
    /* Handle EPIPE error. */
    static int sctp_error(struct sock *sk, int flags, int err)
    {
    	if (err == -EPIPE)
    		err = sock_error(sk) ? : -EPIPE;
    	if (err == -EPIPE && !(flags & MSG_NOSIGNAL))
    		send_sig(SIGPIPE, current, 0);
    	return err;
    }
    
    /* API 3.1.3 sendmsg() - UDP Style Syntax
     *
     * An application uses sendmsg() and recvmsg() calls to transmit data to
     * and receive data from its peer.
     *
     *  ssize_t sendmsg(int socket, const struct msghdr *message,
     *                  int flags);
     *
     *  socket  - the socket descriptor of the endpoint.
     *  message - pointer to the msghdr structure which contains a single
     *            user message and possibly some ancillary data.
     *
     *            See Section 5 for complete description of the data
     *            structures.
     *
     *  flags   - flags sent or received with the user message, see Section
     *            5 for complete description of the flags.
     *
     * Note:  This function could use a rewrite especially when explicit
     * connect support comes in.
     */
    /* BUG:  We do not implement the equivalent of sk_stream_wait_memory(). */
    
    SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
    
    SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
    			     struct msghdr *msg, size_t msg_len)
    {
    	struct sctp_sock *sp;
    	struct sctp_endpoint *ep;
    	struct sctp_association *new_asoc=NULL, *asoc=NULL;
    	struct sctp_transport *transport, *chunk_tp;
    	struct sctp_chunk *chunk;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct sockaddr *msg_name = NULL;
    	struct sctp_sndrcvinfo default_sinfo = { 0 };
    	struct sctp_sndrcvinfo *sinfo;
    	struct sctp_initmsg *sinit;
    	sctp_assoc_t associd = 0;
    	sctp_cmsgs_t cmsgs = { NULL };
    	int err;
    	sctp_scope_t scope;
    	long timeo;
    	__u16 sinfo_flags = 0;
    	struct sctp_datamsg *datamsg;
    	struct list_head *pos;
    	int msg_flags = msg->msg_flags;
    
    	SCTP_DEBUG_PRINTK("sctp_sendmsg(sk: %p, msg: %p, msg_len: %zu)\n",
    			  sk, msg, msg_len);
    
    	err = 0;
    	sp = sctp_sk(sk);
    	ep = sp->ep;
    
    
    	SCTP_DEBUG_PRINTK("Using endpoint: %p.\n", ep);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* We cannot send a message over a TCP-style listening socket. */
    	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING)) {
    		err = -EPIPE;
    		goto out_nounlock;
    	}
    
    	/* Parse out the SCTP CMSGs.  */
    	err = sctp_msghdr_parse(msg, &cmsgs);
    
    	if (err) {
    		SCTP_DEBUG_PRINTK("msghdr parse err = %x\n", err);
    		goto out_nounlock;
    	}
    
    	/* Fetch the destination address for this packet.  This
    	 * address only selects the association--it is not necessarily
    	 * the address we will send to.
    	 * For a peeled-off socket, msg_name is ignored.
    	 */
    	if (!sctp_style(sk, UDP_HIGH_BANDWIDTH) && msg->msg_name) {
    		int msg_namelen = msg->msg_namelen;
    
    		err = sctp_verify_addr(sk, (union sctp_addr *)msg->msg_name,
    				       msg_namelen);
    		if (err)
    			return err;
    
    		if (msg_namelen > sizeof(to))
    			msg_namelen = sizeof(to);
    		memcpy(&to, msg->msg_name, msg_namelen);
    		msg_name = msg->msg_name;
    	}
    
    	sinfo = cmsgs.info;
    	sinit = cmsgs.init;
    
    	/* Did the user specify SNDRCVINFO?  */
    	if (sinfo) {
    		sinfo_flags = sinfo->sinfo_flags;
    		associd = sinfo->sinfo_assoc_id;
    	}
    
    	SCTP_DEBUG_PRINTK("msg_len: %zu, sinfo_flags: 0x%x\n",
    			  msg_len, sinfo_flags);
    
    
    	/* SCTP_EOF or SCTP_ABORT cannot be set on a TCP-style socket. */
    	if (sctp_style(sk, TCP) && (sinfo_flags & (SCTP_EOF | SCTP_ABORT))) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = -EINVAL;
    		goto out_nounlock;
    	}
    
    
    	/* If SCTP_EOF is set, no data can be sent. Disallow sending zero
    	 * length messages when SCTP_EOF|SCTP_ABORT is not set.
    	 * If SCTP_ABORT is set, the message length could be non zero with
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 * the msg_iov set to the user abort reason.
    
    	if (((sinfo_flags & SCTP_EOF) && (msg_len > 0)) ||
    	    (!(sinfo_flags & (SCTP_EOF|SCTP_ABORT)) && (msg_len == 0))) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = -EINVAL;
    		goto out_nounlock;
    	}
    
    
    	/* If SCTP_ADDR_OVER is set, there must be an address
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 * specified in msg_name.
    	 */
    
    	if ((sinfo_flags & SCTP_ADDR_OVER) && (!msg->msg_name)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = -EINVAL;
    		goto out_nounlock;
    	}
    
    	transport = NULL;
    
    	SCTP_DEBUG_PRINTK("About to look up association.\n");
    
    	sctp_lock_sock(sk);
    
    	/* If a msg_name has been specified, assume this is to be used.  */
    	if (msg_name) {
    		/* Look for a matching association on the endpoint. */
    
    		asoc = sctp_endpoint_lookup_assoc(ep, &to, &transport);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!asoc) {
    			/* If we could not find a matching association on the
    			 * endpoint, make sure that it is not a TCP-style
    			 * socket that already has an association or there is
    			 * no peeled-off association on another socket.
    			 */
    			if ((sctp_style(sk, TCP) &&
    			     sctp_sstate(sk, ESTABLISHED)) ||
    
    			    sctp_endpoint_is_peeled_off(ep, &to)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				err = -EADDRNOTAVAIL;
    				goto out_unlock;
    			}
    		}
    	} else {
    		asoc = sctp_id2assoc(sk, associd);
    		if (!asoc) {
    			err = -EPIPE;
    			goto out_unlock;
    		}
    	}
    
    	if (asoc) {
    		SCTP_DEBUG_PRINTK("Just looked up association: %p.\n", asoc);
    
    		/* We cannot send a message on a TCP-style SCTP_SS_ESTABLISHED
    		 * socket that has an association in CLOSED state. This can
    		 * happen when an accepted socket has an association that is
    		 * already CLOSED.
    		 */
    		if (sctp_state(asoc, CLOSED) && sctp_style(sk, TCP)) {
    			err = -EPIPE;
    			goto out_unlock;
    		}
    
    
    		if (sinfo_flags & SCTP_EOF) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			SCTP_DEBUG_PRINTK("Shutting down association: %p\n",
    					  asoc);
    			sctp_primitive_SHUTDOWN(asoc, NULL);
    			err = 0;
    			goto out_unlock;
    		}
    
    		if (sinfo_flags & SCTP_ABORT) {
    
    			struct sctp_chunk *chunk;
    
    			chunk = sctp_make_abort_user(asoc, msg, msg_len);
    			if (!chunk) {
    				err = -ENOMEM;
    				goto out_unlock;
    			}
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
    
    			sctp_primitive_ABORT(asoc, chunk);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			err = 0;
    			goto out_unlock;
    		}
    	}
    
    	/* Do we need to create the association?  */
    	if (!asoc) {
    		SCTP_DEBUG_PRINTK("There is no association yet.\n");
    
    
    		if (sinfo_flags & (SCTP_EOF | SCTP_ABORT)) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			err = -EINVAL;
    			goto out_unlock;
    		}
    
    		/* Check for invalid stream against the stream counts,
    		 * either the default or the user specified stream counts.
    		 */
    		if (sinfo) {
    			if (!sinit || (sinit && !sinit->sinit_num_ostreams)) {
    				/* Check against the defaults. */
    				if (sinfo->sinfo_stream >=
    				    sp->initmsg.sinit_num_ostreams) {
    					err = -EINVAL;
    					goto out_unlock;
    				}
    			} else {
    				/* Check against the requested.  */
    				if (sinfo->sinfo_stream >=
    				    sinit->sinit_num_ostreams) {
    					err = -EINVAL;
    					goto out_unlock;
    				}
    			}
    		}
    
    		/*
    		 * API 3.1.2 bind() - UDP Style Syntax
    		 * If a bind() or sctp_bindx() is not called prior to a
    		 * sendmsg() call that initiates a new association, the
    		 * system picks an ephemeral port and will choose an address
    		 * set equivalent to binding with a wildcard address.
    		 */
    		if (!ep->base.bind_addr.port) {
    			if (sctp_autobind(sk)) {
    				err = -EAGAIN;
    				goto out_unlock;
    			}
    
    		} else {
    			/*
    			 * If an unprivileged user inherits a one-to-many
    			 * style socket with open associations on a privileged
    			 * port, it MAY be permitted to accept new associations,
    			 * but it SHOULD NOT be permitted to open new
    			 * associations.
    			 */
    			if (ep->base.bind_addr.port < PROT_SOCK &&
    			    !capable(CAP_NET_BIND_SERVICE)) {
    				err = -EACCES;
    				goto out_unlock;
    			}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    		scope = sctp_scope(&to);
    		new_asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL);
    		if (!new_asoc) {
    			err = -ENOMEM;
    			goto out_unlock;
    		}
    		asoc = new_asoc;
    
    		/* If the SCTP_INIT ancillary data is specified, set all
    		 * the association init values accordingly.
    		 */
    		if (sinit) {
    			if (sinit->sinit_num_ostreams) {
    				asoc->c.sinit_num_ostreams =
    					sinit->sinit_num_ostreams;
    			}
    			if (sinit->sinit_max_instreams) {
    				asoc->c.sinit_max_instreams =
    					sinit->sinit_max_instreams;
    			}
    			if (sinit->sinit_max_attempts) {
    				asoc->max_init_attempts
    					= sinit->sinit_max_attempts;
    			}
    			if (sinit->sinit_max_init_timeo) {
    
    				asoc->max_init_timeo =
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				 msecs_to_jiffies(sinit->sinit_max_init_timeo);
    			}
    		}
    
    		/* Prime the peer's transport structures.  */
    
    		transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, SCTP_UNKNOWN);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!transport) {
    			err = -ENOMEM;
    			goto out_free;
    		}
    		err = sctp_assoc_set_bind_addr_from_ep(asoc, GFP_KERNEL);
    		if (err < 0) {
    			err = -ENOMEM;
    			goto out_free;
    		}
    	}
    
    	/* ASSERT: we have a valid association at this point.  */
    	SCTP_DEBUG_PRINTK("We have a valid association.\n");
    
    	if (!sinfo) {
    		/* If the user didn't specify SNDRCVINFO, make up one with
    		 * some defaults.
    		 */
    		default_sinfo.sinfo_stream = asoc->default_stream;
    		default_sinfo.sinfo_flags = asoc->default_flags;
    		default_sinfo.sinfo_ppid = asoc->default_ppid;
    		default_sinfo.sinfo_context = asoc->default_context;
    		default_sinfo.sinfo_timetolive = asoc->default_timetolive;
    		default_sinfo.sinfo_assoc_id = sctp_assoc2id(asoc);
    		sinfo = &default_sinfo;
    	}
    
    	/* API 7.1.7, the sndbuf size per association bounds the
    	 * maximum size of data that can be sent in a single send call.
    	 */
    	if (msg_len > sk->sk_sndbuf) {
    		err = -EMSGSIZE;
    		goto out_free;
    	}
    
    
    	if (asoc->pmtu_pending)
    		sctp_assoc_pending_pmtu(asoc);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* If fragmentation is disabled and the message length exceeds the
    	 * association fragmentation point, return EMSGSIZE.  The I-D
    	 * does not specify what this error is, but this looks like
    	 * a great fit.
    	 */
    	if (sctp_sk(sk)->disable_fragments && (msg_len > asoc->frag_point)) {
    		err = -EMSGSIZE;
    		goto out_free;
    	}
    
    	if (sinfo) {
    		/* Check for invalid stream. */
    		if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) {
    			err = -EINVAL;
    			goto out_free;
    		}
    	}
    
    	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
    	if (!sctp_wspace(asoc)) {
    		err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
    		if (err)
    			goto out_free;
    	}
    
    	/* If an address is passed with the sendto/sendmsg call, it is used
    	 * to override the primary destination address in the TCP model, or
    
    	 * when SCTP_ADDR_OVER flag is set in the UDP model.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    	if ((sctp_style(sk, TCP) && msg_name) ||
    
    	    (sinfo_flags & SCTP_ADDR_OVER)) {
    
    		chunk_tp = sctp_assoc_lookup_paddr(asoc, &to);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!chunk_tp) {
    			err = -EINVAL;
    			goto out_free;
    		}
    	} else
    		chunk_tp = NULL;
    
    	/* Auto-connect, if we aren't connected already. */
    	if (sctp_state(asoc, CLOSED)) {
    		err = sctp_primitive_ASSOCIATE(asoc, NULL);
    		if (err < 0)
    			goto out_free;
    		SCTP_DEBUG_PRINTK("We associated primitively.\n");
    	}
    
    	/* Break the message into multiple chunks of maximum size. */
    	datamsg = sctp_datamsg_from_user(asoc, sinfo, msg, msg_len);
    	if (!datamsg) {
    		err = -ENOMEM;
    		goto out_free;
    	}
    
    	/* Now send the (possibly) fragmented message. */
    	list_for_each(pos, &datamsg->chunks) {
    		chunk = list_entry(pos, struct sctp_chunk, frag_list);
    		sctp_datamsg_track(chunk);
    
    		/* Do accounting for the write space.  */
    		sctp_set_owner_w(chunk);
    
    		chunk->transport = chunk_tp;
    
    		/* Send it to the lower layers.  Note:  all chunks
    		 * must either fail or succeed.   The lower layer
    		 * works that way today.  Keep it that way or this
    		 * breaks.
    		 */
    		err = sctp_primitive_SEND(asoc, chunk);
    		/* Did the lower layer accept the chunk? */
    		if (err)
    			sctp_chunk_free(chunk);
    		SCTP_DEBUG_PRINTK("We sent primitively.\n");
    	}
    
    	sctp_datamsg_free(datamsg);
    	if (err)
    		goto out_free;
    	else
    		err = msg_len;
    
    	/* If we are already past ASSOCIATE, the lower
    	 * layers are responsible for association cleanup.
    	 */
    	goto out_unlock;
    
    out_free:
    	if (new_asoc)
    		sctp_association_free(asoc);
    out_unlock:
    	sctp_release_sock(sk);
    
    out_nounlock:
    	return sctp_error(sk, msg_flags, err);
    
    #if 0
    do_sock_err:
    	if (msg_len)
    		err = msg_len;
    	else
    		err = sock_error(sk);
    	goto out;
    
    do_interrupted:
    	if (msg_len)
    		err = msg_len;
    	goto out;
    #endif /* 0 */
    }
    
    /* This is an extended version of skb_pull() that removes the data from the
     * start of a skb even when data is spread across the list of skb's in the
     * frag_list. len specifies the total amount of data that needs to be removed.
     * when 'len' bytes could be removed from the skb, it returns 0.
     * If 'len' exceeds the total skb length,  it returns the no. of bytes that
     * could not be removed.
     */
    static int sctp_skb_pull(struct sk_buff *skb, int len)
    {
    	struct sk_buff *list;
    	int skb_len = skb_headlen(skb);
    	int rlen;
    
    	if (len <= skb_len) {
    		__skb_pull(skb, len);
    		return 0;
    	}
    	len -= skb_len;
    	__skb_pull(skb, skb_len);
    
    	for (list = skb_shinfo(skb)->frag_list; list; list = list->next) {
    		rlen = sctp_skb_pull(list, len);
    		skb->len -= (len-rlen);
    		skb->data_len -= (len-rlen);
    
    		if (!rlen)
    			return 0;
    
    		len = rlen;
    	}
    
    	return len;
    }
    
    /* API 3.1.3  recvmsg() - UDP Style Syntax
     *
     *  ssize_t recvmsg(int socket, struct msghdr *message,
     *                    int flags);
     *
     *  socket  - the socket descriptor of the endpoint.
     *  message - pointer to the msghdr structure which contains a single
     *            user message and possibly some ancillary data.
     *
     *            See Section 5 for complete description of the data
     *            structures.
     *
     *  flags   - flags sent or received with the user message, see Section
     *            5 for complete description of the flags.
     */
    static struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
    
    SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
    			     struct msghdr *msg, size_t len, int noblock,
    			     int flags, int *addr_len)
    {
    	struct sctp_ulpevent *event = NULL;
    	struct sctp_sock *sp = sctp_sk(sk);
    	struct sk_buff *skb;
    	int copied;
    	int err = 0;
    	int skb_len;
    
    	SCTP_DEBUG_PRINTK("sctp_recvmsg(%s: %p, %s: %p, %s: %zd, %s: %d, %s: "
    			  "0x%x, %s: %p)\n", "sk", sk, "msghdr", msg,
    			  "len", len, "knoblauch", noblock,
    			  "flags", flags, "addr_len", addr_len);
    
    	sctp_lock_sock(sk);
    
    	if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED)) {
    		err = -ENOTCONN;
    		goto out;
    	}
    
    	skb = sctp_skb_recv_datagram(sk, flags, noblock, &err);
    	if (!skb)
    		goto out;
    
    	/* Get the total length of the skb including any skb's in the
    	 * frag_list.
    	 */
    	skb_len = skb->len;
    
    	copied = skb_len;
    	if (copied > len)
    		copied = len;
    
    	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
    
    	event = sctp_skb2event(skb);
    
    	if (err)
    		goto out_free;
    
    	sock_recv_timestamp(msg, sk, skb);
    	if (sctp_ulpevent_is_notification(event)) {
    		msg->msg_flags |= MSG_NOTIFICATION;
    		sp->pf->event_msgname(event, msg->msg_name, addr_len);
    	} else {
    		sp->pf->skb_msgname(skb, msg->msg_name, addr_len);
    	}
    
    	/* Check if we allow SCTP_SNDRCVINFO. */
    	if (sp->subscribe.sctp_data_io_event)
    		sctp_ulpevent_read_sndrcvinfo(event, msg);
    #if 0
    	/* FIXME: we should be calling IP/IPv6 layers.  */
    	if (sk->sk_protinfo.af_inet.cmsg_flags)
    		ip_cmsg_recv(msg, skb);
    #endif
    
    	err = copied;
    
    	/* If skb's length exceeds the user's buffer, update the skb and
    	 * push it back to the receive_queue so that the next call to
    	 * recvmsg() will return the remaining data. Don't set MSG_EOR.
    	 */
    	if (skb_len > copied) {
    		msg->msg_flags &= ~MSG_EOR;
    		if (flags & MSG_PEEK)
    			goto out_free;
    		sctp_skb_pull(skb, copied);
    		skb_queue_head(&sk->sk_receive_queue, skb);
    
    		/* When only partial message is copied to the user, increase
    		 * rwnd by that amount. If all the data in the skb is read,
    		 * rwnd is updated when the event is freed.
    		 */
    		sctp_assoc_rwnd_increase(event->asoc, copied);
    		goto out;
    	} else if ((event->msg_flags & MSG_NOTIFICATION) ||
    		   (event->msg_flags & MSG_EOR))
    		msg->msg_flags |= MSG_EOR;
    	else
    		msg->msg_flags &= ~MSG_EOR;
    
    out_free:
    	if (flags & MSG_PEEK) {
    		/* Release the skb reference acquired after peeking the skb in
    		 * sctp_skb_recv_datagram().
    		 */
    		kfree_skb(skb);
    	} else {
    		/* Free the event which includes releasing the reference to
    		 * the owner of the skb, freeing the skb and updating the
    		 * rwnd.
    		 */
    		sctp_ulpevent_free(event);
    	}
    out:
    	sctp_release_sock(sk);
    	return err;
    }
    
    /* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS)
     *
     * This option is a on/off flag.  If enabled no SCTP message
     * fragmentation will be performed.  Instead if a message being sent
     * exceeds the current PMTU size, the message will NOT be sent and
     * instead a error will be indicated to the user.
     */
    static int sctp_setsockopt_disable_fragments(struct sock *sk,
    					    char __user *optval, int optlen)
    {
    	int val;
    
    	if (optlen < sizeof(int))
    		return -EINVAL;
    
    	if (get_user(val, (int __user *)optval))
    		return -EFAULT;
    
    	sctp_sk(sk)->disable_fragments = (val == 0) ? 0 : 1;
    
    	return 0;
    }
    
    static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
    					int optlen)
    {
    	if (optlen != sizeof(struct sctp_event_subscribe))
    		return -EINVAL;
    	if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen))
    		return -EFAULT;
    	return 0;
    }
    
    /* 7.1.8 Automatic Close of associations (SCTP_AUTOCLOSE)
     *
     * This socket option is applicable to the UDP-style socket only.  When
     * set it will cause associations that are idle for more than the
     * specified number of seconds to automatically close.  An association
     * being idle is defined an association that has NOT sent or received
     * user data.  The special value of '0' indicates that no automatic
     * close of any associations should be performed.  The option expects an
     * integer defining the number of seconds of idle time before an
     * association is closed.
     */
    static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
    					    int optlen)
    {
    	struct sctp_sock *sp = sctp_sk(sk);
    
    	/* Applicable to UDP-style socket only */
    	if (sctp_style(sk, TCP))
    		return -EOPNOTSUPP;
    	if (optlen != sizeof(int))
    		return -EINVAL;
    	if (copy_from_user(&sp->autoclose, optval, optlen))
    		return -EFAULT;
    
    	return 0;
    }