Skip to content
Snippets Groups Projects
socket.c 163 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    		bp = &asoc->base.bind_addr;
    		addr_lock = &asoc->base.addr_lock;
    	}
    
    	to = getaddrs.addrs;
    
    	sctp_read_lock(addr_lock);
    
    	/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
    	 * addresses from the global local address list.
    	 */
    	if (sctp_list_single_entry(&bp->address_list)) {
    		addr = list_entry(bp->address_list.next,
    				  struct sctp_sockaddr_entry, list);
    
    		if (sctp_is_any(&addr->a)) {
    
    			cnt = sctp_copy_laddrs_to_user_old(sk, bp->port,
    							   getaddrs.addr_num,
    							   to);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (cnt < 0) {
    				err = cnt;
    				goto unlock;
    			}
    			goto copy_getaddrs;		
    		}
    	}
    
    	list_for_each(pos, &bp->address_list) {
    		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
    
    		memcpy(&temp, &addr->a, sizeof(temp));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
    		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
    		if (copy_to_user(to, &temp, addrlen)) {
    			err = -EFAULT;
    			goto unlock;
    		}
    		to += addrlen;
    		cnt ++;
    		if (cnt >= getaddrs.addr_num) break;
    	}
    
    copy_getaddrs:
    	getaddrs.addr_num = cnt;
    
    	if (copy_to_user(optval, &getaddrs, sizeof(struct sctp_getaddrs_old)))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		err = -EFAULT;
    
    unlock:
    	sctp_read_unlock(addr_lock);
    	return err;
    }
    
    
    static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
    				       char __user *optval, int __user *optlen)
    {
    	struct sctp_bind_addr *bp;
    	struct sctp_association *asoc;
    	struct list_head *pos;
    	int cnt = 0;
    	struct sctp_getaddrs getaddrs;
    	struct sctp_sockaddr_entry *addr;
    	void __user *to;
    	union sctp_addr temp;
    	struct sctp_sock *sp = sctp_sk(sk);
    	int addrlen;
    	rwlock_t *addr_lock;
    	int err = 0;
    	size_t space_left;
    	int bytes_copied;
    
    	if (len <= sizeof(struct sctp_getaddrs))
    		return -EINVAL;
    
    	if (copy_from_user(&getaddrs, optval, sizeof(struct sctp_getaddrs)))
    		return -EFAULT;
    
    	/*
    	 *  For UDP-style sockets, id specifies the association to query.
    	 *  If the id field is set to the value '0' then the locally bound
    	 *  addresses are returned without regard to any particular
    	 *  association.
    	 */
    	if (0 == getaddrs.assoc_id) {
    		bp = &sctp_sk(sk)->ep->base.bind_addr;
    		addr_lock = &sctp_sk(sk)->ep->base.addr_lock;
    	} else {
    		asoc = sctp_id2assoc(sk, getaddrs.assoc_id);
    		if (!asoc)
    			return -EINVAL;
    		bp = &asoc->base.bind_addr;
    		addr_lock = &asoc->base.addr_lock;
    	}
    
    	to = optval + offsetof(struct sctp_getaddrs,addrs);
    	space_left = len - sizeof(struct sctp_getaddrs) -
    			 offsetof(struct sctp_getaddrs,addrs);
    
    	sctp_read_lock(addr_lock);
    
    	/* If the endpoint is bound to 0.0.0.0 or ::0, get the valid
    	 * addresses from the global local address list.
    	 */
    	if (sctp_list_single_entry(&bp->address_list)) {
    		addr = list_entry(bp->address_list.next,
    				  struct sctp_sockaddr_entry, list);
    
    		if (sctp_is_any(&addr->a)) {
    
    			cnt = sctp_copy_laddrs_to_user(sk, bp->port,
    						       &to, space_left);
    			if (cnt < 0) {
    				err = cnt;
    				goto unlock;
    			}
    			goto copy_getaddrs;		
    		}
    	}
    
    	list_for_each(pos, &bp->address_list) {
    		addr = list_entry(pos, struct sctp_sockaddr_entry, list);
    
    		memcpy(&temp, &addr->a, sizeof(temp));
    
    		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
    		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
    		if(space_left < addrlen)
    			return -ENOMEM; /*fixme: right error?*/
    		if (copy_to_user(to, &temp, addrlen)) {
    			err = -EFAULT;
    			goto unlock;
    		}
    		to += addrlen;
    		cnt ++;
    		space_left -= addrlen;
    	}
    
    copy_getaddrs:
    	if (put_user(cnt, &((struct sctp_getaddrs __user *)optval)->addr_num))
    		return -EFAULT;
    	bytes_copied = ((char __user *)to) - optval;
    	if (put_user(bytes_copied, optlen))
    		return -EFAULT;
    
    unlock:
    	sctp_read_unlock(addr_lock);
    	return err;
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* 7.1.10 Set Primary Address (SCTP_PRIMARY_ADDR)
     *
     * Requests that the local SCTP stack use the enclosed peer address as
     * the association primary.  The enclosed address must be one of the
     * association peer's addresses.
     */
    static int sctp_getsockopt_primary_addr(struct sock *sk, int len,
    					char __user *optval, int __user *optlen)
    {
    	struct sctp_prim prim;
    	struct sctp_association *asoc;
    	struct sctp_sock *sp = sctp_sk(sk);
    
    	if (len != sizeof(struct sctp_prim))
    		return -EINVAL;
    
    	if (copy_from_user(&prim, optval, sizeof(struct sctp_prim)))
    		return -EFAULT;
    
    	asoc = sctp_id2assoc(sk, prim.ssp_assoc_id);
    	if (!asoc)
    		return -EINVAL;
    
    	if (!asoc->peer.primary_path)
    		return -ENOTCONN;
    	
    
    	memcpy(&prim.ssp_addr, &asoc->peer.primary_path->ipaddr,
    		asoc->peer.primary_path->af_specific->sockaddr_len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp,
    			(union sctp_addr *)&prim.ssp_addr);
    
    	if (copy_to_user(optval, &prim, sizeof(struct sctp_prim)))
    		return -EFAULT;
    
    	return 0;
    }
    
    /*
     * 7.1.11  Set Adaption Layer Indicator (SCTP_ADAPTION_LAYER)
     *
     * Requests that the local endpoint set the specified Adaption Layer
     * Indication parameter for all future INIT and INIT-ACK exchanges.
     */
    static int sctp_getsockopt_adaption_layer(struct sock *sk, int len,
    				  char __user *optval, int __user *optlen)
    {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (len != sizeof(struct sctp_setadaption))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return -EINVAL;
    
    
    	adaption.ssb_adaption_ind = sctp_sk(sk)->adaption_ind;
    	if (copy_to_user(optval, &adaption, len))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return -EFAULT;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    }
    
    /*
     *
     * 7.1.14 Set default send parameters (SCTP_DEFAULT_SEND_PARAM)
     *
     *   Applications that wish to use the sendto() system call may wish to
     *   specify a default set of parameters that would normally be supplied
     *   through the inclusion of ancillary data.  This socket option allows
     *   such an application to set the default sctp_sndrcvinfo structure.
    
    
     *   The application that wishes to use this socket option simply passes
     *   in to this call the sctp_sndrcvinfo structure defined in Section
     *   5.2.2) The input parameters accepted by this call include
     *   sinfo_stream, sinfo_flags, sinfo_ppid, sinfo_context,
     *   sinfo_timetolive.  The user must provide the sinfo_assoc_id field in
     *   to this call if the caller is using the UDP model.
     *
     *   For getsockopt, it get the default sctp_sndrcvinfo structure.
     */
    static int sctp_getsockopt_default_send_param(struct sock *sk,
    					int len, char __user *optval,
    					int __user *optlen)
    {
    	struct sctp_sndrcvinfo info;
    	struct sctp_association *asoc;
    	struct sctp_sock *sp = sctp_sk(sk);
    
    	if (len != sizeof(struct sctp_sndrcvinfo))
    		return -EINVAL;
    	if (copy_from_user(&info, optval, sizeof(struct sctp_sndrcvinfo)))
    		return -EFAULT;
    
    	asoc = sctp_id2assoc(sk, info.sinfo_assoc_id);
    	if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP))
    		return -EINVAL;
    
    	if (asoc) {
    		info.sinfo_stream = asoc->default_stream;
    		info.sinfo_flags = asoc->default_flags;
    		info.sinfo_ppid = asoc->default_ppid;
    		info.sinfo_context = asoc->default_context;
    		info.sinfo_timetolive = asoc->default_timetolive;
    	} else {
    		info.sinfo_stream = sp->default_stream;
    		info.sinfo_flags = sp->default_flags;
    		info.sinfo_ppid = sp->default_ppid;
    		info.sinfo_context = sp->default_context;
    		info.sinfo_timetolive = sp->default_timetolive;
    	}
    
    	if (copy_to_user(optval, &info, sizeof(struct sctp_sndrcvinfo)))
    		return -EFAULT;
    
    	return 0;
    }
    
    /*
     *
     * 7.1.5 SCTP_NODELAY
     *
     * Turn on/off any Nagle-like algorithm.  This means that packets are
     * generally sent as soon as possible and no unnecessary delays are
     * introduced, at the cost of more packets in the network.  Expects an
     * integer boolean flag.
     */
    
    static int sctp_getsockopt_nodelay(struct sock *sk, int len,
    				   char __user *optval, int __user *optlen)
    {
    	int val;
    
    	if (len < sizeof(int))
    		return -EINVAL;
    
    	len = sizeof(int);
    	val = (sctp_sk(sk)->nodelay == 1);
    	if (put_user(len, optlen))
    		return -EFAULT;
    	if (copy_to_user(optval, &val, len))
    		return -EFAULT;
    	return 0;
    }
    
    /*
     *
     * 7.1.1 SCTP_RTOINFO
     *
     * The protocol parameters used to initialize and bound retransmission
     * timeout (RTO) are tunable. sctp_rtoinfo structure is used to access
     * and modify these parameters.
     * All parameters are time values, in milliseconds.  A value of 0, when
     * modifying the parameters, indicates that the current value should not
     * be changed.
     *
     */
    static int sctp_getsockopt_rtoinfo(struct sock *sk, int len,
    				char __user *optval,
    				int __user *optlen) {
    	struct sctp_rtoinfo rtoinfo;
    	struct sctp_association *asoc;
    
    	if (len != sizeof (struct sctp_rtoinfo))
    		return -EINVAL;
    
    	if (copy_from_user(&rtoinfo, optval, sizeof (struct sctp_rtoinfo)))
    		return -EFAULT;
    
    	asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
    
    	if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP))
    		return -EINVAL;
    
    	/* Values corresponding to the specific association. */
    	if (asoc) {
    		rtoinfo.srto_initial = jiffies_to_msecs(asoc->rto_initial);
    		rtoinfo.srto_max = jiffies_to_msecs(asoc->rto_max);
    		rtoinfo.srto_min = jiffies_to_msecs(asoc->rto_min);
    	} else {
    		/* Values corresponding to the endpoint. */
    		struct sctp_sock *sp = sctp_sk(sk);
    
    		rtoinfo.srto_initial = sp->rtoinfo.srto_initial;
    		rtoinfo.srto_max = sp->rtoinfo.srto_max;
    		rtoinfo.srto_min = sp->rtoinfo.srto_min;
    	}
    
    	if (put_user(len, optlen))
    		return -EFAULT;
    
    	if (copy_to_user(optval, &rtoinfo, len))
    		return -EFAULT;
    
    	return 0;
    }
    
    /*
     *
     * 7.1.2 SCTP_ASSOCINFO
     *
     * This option is used to tune the the maximum retransmission attempts
     * of the association.
     * Returns an error if the new association retransmission value is
     * greater than the sum of the retransmission value  of the peer.
     * See [SCTP] for more information.
     *
     */
    static int sctp_getsockopt_associnfo(struct sock *sk, int len,
    				     char __user *optval,
    				     int __user *optlen)
    {
    
    	struct sctp_assocparams assocparams;
    	struct sctp_association *asoc;
    	struct list_head *pos;
    	int cnt = 0;
    
    	if (len != sizeof (struct sctp_assocparams))
    		return -EINVAL;
    
    	if (copy_from_user(&assocparams, optval,
    			sizeof (struct sctp_assocparams)))
    		return -EFAULT;
    
    	asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
    
    	if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP))
    		return -EINVAL;
    
    	/* Values correspoinding to the specific association */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		assocparams.sasoc_asocmaxrxt = asoc->max_retrans;
    		assocparams.sasoc_peer_rwnd = asoc->peer.rwnd;
    		assocparams.sasoc_local_rwnd = asoc->a_rwnd;
    		assocparams.sasoc_cookie_life = (asoc->cookie_life.tv_sec
    						* 1000) +
    						(asoc->cookie_life.tv_usec
    						/ 1000);
    
    		list_for_each(pos, &asoc->peer.transport_addr_list) {
    			cnt ++;
    		}
    
    		assocparams.sasoc_number_peer_destinations = cnt;
    	} else {
    		/* Values corresponding to the endpoint */
    		struct sctp_sock *sp = sctp_sk(sk);
    
    		assocparams.sasoc_asocmaxrxt = sp->assocparams.sasoc_asocmaxrxt;
    		assocparams.sasoc_peer_rwnd = sp->assocparams.sasoc_peer_rwnd;
    		assocparams.sasoc_local_rwnd = sp->assocparams.sasoc_local_rwnd;
    		assocparams.sasoc_cookie_life =
    					sp->assocparams.sasoc_cookie_life;
    		assocparams.sasoc_number_peer_destinations =
    					sp->assocparams.
    					sasoc_number_peer_destinations;
    	}
    
    	if (put_user(len, optlen))
    		return -EFAULT;
    
    	if (copy_to_user(optval, &assocparams, len))
    		return -EFAULT;
    
    	return 0;
    }
    
    /*
     * 7.1.16 Set/clear IPv4 mapped addresses (SCTP_I_WANT_MAPPED_V4_ADDR)
     *
     * This socket option is a boolean flag which turns on or off mapped V4
     * addresses.  If this option is turned on and the socket is type
     * PF_INET6, then IPv4 addresses will be mapped to V6 representation.
     * If this option is turned off, then no mapping will be done of V4
     * addresses and a user will receive both PF_INET6 and PF_INET type
     * addresses on the socket.
     */
    static int sctp_getsockopt_mappedv4(struct sock *sk, int len,
    				    char __user *optval, int __user *optlen)
    {
    	int val;
    	struct sctp_sock *sp = sctp_sk(sk);
    
    	if (len < sizeof(int))
    		return -EINVAL;
    
    	len = sizeof(int);
    	val = sp->v4mapped;
    	if (put_user(len, optlen))
    		return -EFAULT;
    	if (copy_to_user(optval, &val, len))
    		return -EFAULT;
    
    	return 0;
    }
    
    /*
     * 7.1.17 Set the maximum fragrmentation size (SCTP_MAXSEG)
     *
     * This socket option specifies the maximum size to put in any outgoing
     * SCTP chunk.  If a message is larger than this size it will be
     * fragmented by SCTP into the specified size.  Note that the underlying
     * SCTP implementation may fragment into smaller sized chunks when the
     * PMTU of the underlying association is smaller than the value set by
     * the user.
     */
    static int sctp_getsockopt_maxseg(struct sock *sk, int len,
    				  char __user *optval, int __user *optlen)
    {
    	int val;
    
    	if (len < sizeof(int))
    		return -EINVAL;
    
    	len = sizeof(int);
    
    	val = sctp_sk(sk)->user_frag;
    	if (put_user(len, optlen))
    		return -EFAULT;
    	if (copy_to_user(optval, &val, len))
    		return -EFAULT;
    
    	return 0;
    }
    
    SCTP_STATIC int sctp_getsockopt(struct sock *sk, int level, int optname,
    				char __user *optval, int __user *optlen)
    {
    	int retval = 0;
    	int len;
    
    
    	SCTP_DEBUG_PRINTK("sctp_getsockopt(sk: %p... optname: %d)\n",
    			  sk, optname);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* I can hardly begin to describe how wrong this is.  This is
    	 * so broken as to be worse than useless.  The API draft
    	 * REALLY is NOT helpful here...  I am not convinced that the
    	 * semantics of getsockopt() with a level OTHER THAN SOL_SCTP
    	 * are at all well-founded.
    	 */
    	if (level != SOL_SCTP) {
    		struct sctp_af *af = sctp_sk(sk)->pf->af;
    
    		retval = af->getsockopt(sk, level, optname, optval, optlen);
    		return retval;
    	}
    
    	if (get_user(len, optlen))
    		return -EFAULT;
    
    	sctp_lock_sock(sk);
    
    	switch (optname) {
    	case SCTP_STATUS:
    		retval = sctp_getsockopt_sctp_status(sk, len, optval, optlen);
    		break;
    	case SCTP_DISABLE_FRAGMENTS:
    		retval = sctp_getsockopt_disable_fragments(sk, len, optval,
    							   optlen);
    		break;
    	case SCTP_EVENTS:
    		retval = sctp_getsockopt_events(sk, len, optval, optlen);
    		break;
    	case SCTP_AUTOCLOSE:
    		retval = sctp_getsockopt_autoclose(sk, len, optval, optlen);
    		break;
    	case SCTP_SOCKOPT_PEELOFF:
    		retval = sctp_getsockopt_peeloff(sk, len, optval, optlen);
    		break;
    	case SCTP_PEER_ADDR_PARAMS:
    		retval = sctp_getsockopt_peer_addr_params(sk, len, optval,
    							  optlen);
    		break;
    
    	case SCTP_DELAYED_ACK_TIME:
    		retval = sctp_getsockopt_delayed_ack_time(sk, len, optval,
    							  optlen);
    		break;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	case SCTP_INITMSG:
    		retval = sctp_getsockopt_initmsg(sk, len, optval, optlen);
    		break;
    
    	case SCTP_GET_PEER_ADDRS_NUM_OLD:
    		retval = sctp_getsockopt_peer_addrs_num_old(sk, len, optval,
    							    optlen);
    		break;
    	case SCTP_GET_LOCAL_ADDRS_NUM_OLD:
    		retval = sctp_getsockopt_local_addrs_num_old(sk, len, optval,
    							     optlen);
    		break;
    	case SCTP_GET_PEER_ADDRS_OLD:
    		retval = sctp_getsockopt_peer_addrs_old(sk, len, optval,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    							optlen);
    		break;
    
    	case SCTP_GET_LOCAL_ADDRS_OLD:
    		retval = sctp_getsockopt_local_addrs_old(sk, len, optval,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    							 optlen);
    		break;
    	case SCTP_GET_PEER_ADDRS:
    		retval = sctp_getsockopt_peer_addrs(sk, len, optval,
    						    optlen);
    		break;
    	case SCTP_GET_LOCAL_ADDRS:
    		retval = sctp_getsockopt_local_addrs(sk, len, optval,
    						     optlen);
    		break;
    	case SCTP_DEFAULT_SEND_PARAM:
    		retval = sctp_getsockopt_default_send_param(sk, len,
    							    optval, optlen);
    		break;
    	case SCTP_PRIMARY_ADDR:
    		retval = sctp_getsockopt_primary_addr(sk, len, optval, optlen);
    		break;
    	case SCTP_NODELAY:
    		retval = sctp_getsockopt_nodelay(sk, len, optval, optlen);
    		break;
    	case SCTP_RTOINFO:
    		retval = sctp_getsockopt_rtoinfo(sk, len, optval, optlen);
    		break;
    	case SCTP_ASSOCINFO:
    		retval = sctp_getsockopt_associnfo(sk, len, optval, optlen);
    		break;
    	case SCTP_I_WANT_MAPPED_V4_ADDR:
    		retval = sctp_getsockopt_mappedv4(sk, len, optval, optlen);
    		break;
    	case SCTP_MAXSEG:
    		retval = sctp_getsockopt_maxseg(sk, len, optval, optlen);
    		break;
    	case SCTP_GET_PEER_ADDR_INFO:
    		retval = sctp_getsockopt_peer_addr_info(sk, len, optval,
    							optlen);
    		break;
    	case SCTP_ADAPTION_LAYER:
    		retval = sctp_getsockopt_adaption_layer(sk, len, optval,
    							optlen);
    		break;
    	default:
    		retval = -ENOPROTOOPT;
    		break;
    	};
    
    	sctp_release_sock(sk);
    	return retval;
    }
    
    static void sctp_hash(struct sock *sk)
    {
    	/* STUB */
    }
    
    static void sctp_unhash(struct sock *sk)
    {
    	/* STUB */
    }
    
    /* Check if port is acceptable.  Possibly find first available port.
     *
     * The port hash table (contained in the 'global' SCTP protocol storage
     * returned by struct sctp_protocol *sctp_get_protocol()). The hash
     * table is an array of 4096 lists (sctp_bind_hashbucket). Each
     * list (the list number is the port number hashed out, so as you
     * would expect from a hash function, all the ports in a given list have
     * such a number that hashes out to the same list number; you were
     * expecting that, right?); so each list has a set of ports, with a
     * link to the socket (struct sock) that uses it, the port number and
     * a fastreuse flag (FIXME: NPI ipg).
     */
    static struct sctp_bind_bucket *sctp_bucket_create(
    	struct sctp_bind_hashbucket *head, unsigned short snum);
    
    static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
    {
    	struct sctp_bind_hashbucket *head; /* hash list */
    	struct sctp_bind_bucket *pp; /* hash list port iterator */
    	unsigned short snum;
    	int ret;
    
    
    	snum = ntohs(addr->v4.sin_port);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	SCTP_DEBUG_PRINTK("sctp_get_port() begins, snum=%d\n", snum);
    	sctp_local_bh_disable();
    
    	if (snum == 0) {
    		/* Search for an available port.
    		 *
    		 * 'sctp_port_rover' was the last port assigned, so
    		 * we start to search from 'sctp_port_rover +
    		 * 1'. What we do is first check if port 'rover' is
    		 * already in the hash table; if not, we use that; if
    		 * it is, we try next.
    		 */
    		int low = sysctl_local_port_range[0];
    		int high = sysctl_local_port_range[1];
    		int remaining = (high - low) + 1;
    		int rover;
    		int index;
    
    		sctp_spin_lock(&sctp_port_alloc_lock);
    		rover = sctp_port_rover;
    		do {
    			rover++;
    			if ((rover < low) || (rover > high))
    				rover = low;
    			index = sctp_phashfn(rover);
    			head = &sctp_port_hashtable[index];
    			sctp_spin_lock(&head->lock);
    			for (pp = head->chain; pp; pp = pp->next)
    				if (pp->port == rover)
    					goto next;
    			break;
    		next:
    			sctp_spin_unlock(&head->lock);
    		} while (--remaining > 0);
    		sctp_port_rover = rover;
    		sctp_spin_unlock(&sctp_port_alloc_lock);
    
    		/* Exhausted local port range during search? */
    		ret = 1;
    		if (remaining <= 0)
    			goto fail;
    
    		/* OK, here is the one we will use.  HEAD (the port
    		 * hash table list entry) is non-NULL and we hold it's
    		 * mutex.
    		 */
    		snum = rover;
    	} else {
    		/* We are given an specific port number; we verify
    		 * that it is not being used. If it is used, we will
    		 * exahust the search in the hash list corresponding
    		 * to the port number (snum) - we detect that with the
    		 * port iterator, pp being NULL.
    		 */
    		head = &sctp_port_hashtable[sctp_phashfn(snum)];
    		sctp_spin_lock(&head->lock);
    		for (pp = head->chain; pp; pp = pp->next) {
    			if (pp->port == snum)
    				goto pp_found;
    		}
    	}
    	pp = NULL;
    	goto pp_not_found;
    pp_found:
    	if (!hlist_empty(&pp->owner)) {
    		/* We had a port hash table hit - there is an
    		 * available port (pp != NULL) and it is being
    		 * used by other socket (pp->owner not empty); that other
    		 * socket is going to be sk2.
    		 */
    		int reuse = sk->sk_reuse;
    		struct sock *sk2;
    		struct hlist_node *node;
    
    		SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
    		if (pp->fastreuse && sk->sk_reuse)
    			goto success;
    
    		/* Run through the list of sockets bound to the port
    		 * (pp->port) [via the pointers bind_next and
    		 * bind_pprev in the struct sock *sk2 (pp->sk)]. On each one,
    		 * we get the endpoint they describe and run through
    		 * the endpoint's list of IP (v4 or v6) addresses,
    		 * comparing each of the addresses with the address of
    		 * the socket sk. If we find a match, then that means
    		 * that this port/socket (sk) combination are already
    		 * in an endpoint.
    		 */
    		sk_for_each_bound(sk2, node, &pp->owner) {
    			struct sctp_endpoint *ep2;
    			ep2 = sctp_sk(sk2)->ep;
    
    			if (reuse && sk2->sk_reuse)
    				continue;
    
    
    			if (sctp_bind_addr_match(&ep2->base.bind_addr, addr,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    						 sctp_sk(sk))) {
    				ret = (long)sk2;
    				goto fail_unlock;
    			}
    		}
    		SCTP_DEBUG_PRINTK("sctp_get_port(): Found a match\n");
    	}
    pp_not_found:
    	/* If there was a hash table miss, create a new port.  */
    	ret = 1;
    	if (!pp && !(pp = sctp_bucket_create(head, snum)))
    		goto fail_unlock;
    
    	/* In either case (hit or miss), make sure fastreuse is 1 only
    	 * if sk->sk_reuse is too (that is, if the caller requested
    	 * SO_REUSEADDR on this socket -sk-).
    	 */
    	if (hlist_empty(&pp->owner))
    		pp->fastreuse = sk->sk_reuse ? 1 : 0;
    	else if (pp->fastreuse && !sk->sk_reuse)
    		pp->fastreuse = 0;
    
    	/* We are set, so fill up all the data in the hash table
    	 * entry, tie the socket list information with the rest of the
    	 * sockets FIXME: Blurry, NPI (ipg).
    	 */
    success:
    	inet_sk(sk)->num = snum;
    	if (!sctp_sk(sk)->bind_hash) {
    		sk_add_bind_node(sk, &pp->owner);
    		sctp_sk(sk)->bind_hash = pp;
    	}
    	ret = 0;
    
    fail_unlock:
    	sctp_spin_unlock(&head->lock);
    
    fail:
    	sctp_local_bh_enable();
    	return ret;
    }
    
    /* Assign a 'snum' port to the socket.  If snum == 0, an ephemeral
     * port is requested.
     */
    static int sctp_get_port(struct sock *sk, unsigned short snum)
    {
    	long ret;
    	union sctp_addr addr;
    	struct sctp_af *af = sctp_sk(sk)->pf->af;
    
    	/* Set up a dummy address struct from the sk. */
    	af->from_sk(&addr, sk);
    	addr.v4.sin_port = htons(snum);
    
    	/* Note: sk->sk_num gets filled in if ephemeral port request. */
    	ret = sctp_get_port_local(sk, &addr);
    
    	return (ret ? 1 : 0);
    }
    
    /*
     * 3.1.3 listen() - UDP Style Syntax
     *
     *   By default, new associations are not accepted for UDP style sockets.
     *   An application uses listen() to mark a socket as being able to
     *   accept new associations.
     */
    SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
    {
    	struct sctp_sock *sp = sctp_sk(sk);
    	struct sctp_endpoint *ep = sp->ep;
    
    	/* Only UDP style sockets that are not peeled off are allowed to
    	 * listen().
    	 */
    	if (!sctp_style(sk, UDP))
    		return -EINVAL;
    
    	/* If backlog is zero, disable listening. */
    	if (!backlog) {
    		if (sctp_sstate(sk, CLOSED))
    			return 0;
    		
    		sctp_unhash_endpoint(ep);
    		sk->sk_state = SCTP_SS_CLOSED;
    	}
    
    	/* Return if we are already listening. */
    	if (sctp_sstate(sk, LISTENING))
    		return 0;
    		
    	/*
    	 * If a bind() or sctp_bindx() is not called prior to a listen()
    	 * call that allows new associations to be accepted, the system
    	 * picks an ephemeral port and will choose an address set equivalent
    	 * to binding with a wildcard address.
    	 *
    	 * This is not currently spelled out in the SCTP sockets
    	 * extensions draft, but follows the practice as seen in TCP
    	 * sockets.
    	 */
    	if (!ep->base.bind_addr.port) {
    		if (sctp_autobind(sk))
    			return -EAGAIN;
    	}
    	sk->sk_state = SCTP_SS_LISTENING;
    	sctp_hash_endpoint(ep);
    	return 0;
    }
    
    /*
     * 4.1.3 listen() - TCP Style Syntax
     *
     *   Applications uses listen() to ready the SCTP endpoint for accepting
     *   inbound associations.
     */
    SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
    {
    	struct sctp_sock *sp = sctp_sk(sk);
    	struct sctp_endpoint *ep = sp->ep;
    
    	/* If backlog is zero, disable listening. */
    	if (!backlog) {
    		if (sctp_sstate(sk, CLOSED))
    			return 0;
    		
    		sctp_unhash_endpoint(ep);
    		sk->sk_state = SCTP_SS_CLOSED;
    	}
    
    	if (sctp_sstate(sk, LISTENING))
    		return 0;
    
    	/*
    	 * If a bind() or sctp_bindx() is not called prior to a listen()
    	 * call that allows new associations to be accepted, the system
    	 * picks an ephemeral port and will choose an address set equivalent
    	 * to binding with a wildcard address.
    	 *
    	 * This is not currently spelled out in the SCTP sockets
    	 * extensions draft, but follows the practice as seen in TCP
    	 * sockets.
    	 */
    	if (!ep->base.bind_addr.port) {
    		if (sctp_autobind(sk))
    			return -EAGAIN;
    	}
    	sk->sk_state = SCTP_SS_LISTENING;
    	sk->sk_max_ack_backlog = backlog;
    	sctp_hash_endpoint(ep);
    	return 0;
    }
    
    /*
     *  Move a socket to LISTENING state.
     */
    int sctp_inet_listen(struct socket *sock, int backlog)
    {
    	struct sock *sk = sock->sk;
    
    	struct crypto_hash *tfm = NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int err = -EINVAL;
    
    	if (unlikely(backlog < 0))
    		goto out;
    
    	sctp_lock_sock(sk);
    
    	if (sock->state != SS_UNCONNECTED)
    		goto out;
    
    	/* Allocate HMAC for generating cookie. */
    	if (sctp_hmac_alg) {
    
    		tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (!tfm) {
    			err = -ENOSYS;
    			goto out;
    		}
    	}
    
    	switch (sock->type) {
    	case SOCK_SEQPACKET:
    		err = sctp_seqpacket_listen(sk, backlog);
    		break;
    	case SOCK_STREAM:
    		err = sctp_stream_listen(sk, backlog);
    		break;
    	default:
    		break;
    	};
    	if (err)
    		goto cleanup;
    
    	/* Store away the transform reference. */
    	sctp_sk(sk)->hmac = tfm;
    out:
    	sctp_release_sock(sk);
    	return err;
    cleanup:
    
    	crypto_free_hash(tfm);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	goto out;
    }
    
    /*
     * This function is done by modeling the current datagram_poll() and the
     * tcp_poll().  Note that, based on these implementations, we don't
     * lock the socket in this function, even though it seems that,
     * ideally, locking or some other mechanisms can be used to ensure
    
     * the integrity of the counters (sndbuf and wmem_alloc) used
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * in this place.  We assume that we don't need locks either until proven
     * otherwise.
     *
     * Another thing to note is that we include the Async I/O support
     * here, again, by modeling the current TCP/UDP code.  We don't have
     * a good way to test with it yet.
     */
    unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
    {
    	struct sock *sk = sock->sk;
    	struct sctp_sock *sp = sctp_sk(sk);
    	unsigned int mask;
    
    	poll_wait(file, sk->sk_sleep, wait);
    
    	/* A TCP-style listening socket becomes readable when the accept queue
    	 * is not empty.
    	 */
    	if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
    		return (!list_empty(&sp->ep->asocs)) ?
    		       	(POLLIN | POLLRDNORM) : 0;
    
    	mask = 0;
    
    	/* Is there any exceptional events?  */
    	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
    		mask |= POLLERR;
    
    	if (sk->sk_shutdown & RCV_SHUTDOWN)
    		mask |= POLLRDHUP;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (sk->sk_shutdown == SHUTDOWN_MASK)
    		mask |= POLLHUP;
    
    	/* Is it readable?  Reconsider this code with TCP-style support.  */
    	if (!skb_queue_empty(&sk->sk_receive_queue) ||
    	    (sk->sk_shutdown & RCV_SHUTDOWN))
    		mask |= POLLIN | POLLRDNORM;
    
    	/* The association is either gone or not ready.  */
    	if (!sctp_style(sk, UDP) && sctp_sstate(sk, CLOSED))
    		return mask;
    
    	/* Is it writable?  */
    	if (sctp_writeable(sk)) {
    		mask |= POLLOUT | POLLWRNORM;
    	} else {
    		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
    		/*
    		 * Since the socket is not locked, the buffer
    		 * might be made available after the writeable check and
    		 * before the bit is set.  This could cause a lost I/O
    		 * signal.  tcp_poll() has a race breaker for this race
    		 * condition.  Based on their implementation, we put
    		 * in the following code to cover it as well.
    		 */
    		if (sctp_writeable(sk))
    			mask |= POLLOUT | POLLWRNORM;
    	}
    	return mask;
    }
    
    /********************************************************************
     * 2nd Level Abstractions
     ********************************************************************/
    
    static struct sctp_bind_bucket *sctp_bucket_create(
    	struct sctp_bind_hashbucket *head, unsigned short snum)
    {
    	struct sctp_bind_bucket *pp;
    
    
    	pp = kmem_cache_alloc(sctp_bucket_cachep, GFP_ATOMIC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	SCTP_DBG_OBJCNT_INC(bind_bucket);
    	if (pp) {
    		pp->port = snum;
    		pp->fastreuse = 0;
    		INIT_HLIST_HEAD(&pp->owner);
    		if ((pp->next = head->chain) != NULL)
    			pp->next->pprev = &pp->next;
    		head->chain = pp;