Skip to content
Snippets Groups Projects
dev.c 97.8 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    		case SIOCSIFMAP:
    			if (dev->set_config) {
    				if (!netif_device_present(dev))
    					return -ENODEV;
    				return dev->set_config(dev, &ifr->ifr_map);
    			}
    			return -EOPNOTSUPP;
    
    		case SIOCADDMULTI:
    			if (!dev->set_multicast_list ||
    			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
    				return -EINVAL;
    			if (!netif_device_present(dev))
    				return -ENODEV;
    			return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
    					  dev->addr_len, 1);
    
    		case SIOCDELMULTI:
    			if (!dev->set_multicast_list ||
    			    ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
    				return -EINVAL;
    			if (!netif_device_present(dev))
    				return -ENODEV;
    			return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
    					     dev->addr_len, 1);
    
    		case SIOCGIFINDEX:
    			ifr->ifr_ifindex = dev->ifindex;
    			return 0;
    
    		case SIOCGIFTXQLEN:
    			ifr->ifr_qlen = dev->tx_queue_len;
    			return 0;
    
    		case SIOCSIFTXQLEN:
    			if (ifr->ifr_qlen < 0)
    				return -EINVAL;
    			dev->tx_queue_len = ifr->ifr_qlen;
    			return 0;
    
    		case SIOCSIFNAME:
    			ifr->ifr_newname[IFNAMSIZ-1] = '\0';
    			return dev_change_name(dev, ifr->ifr_newname);
    
    		/*
    		 *	Unknown or private ioctl
    		 */
    
    		default:
    			if ((cmd >= SIOCDEVPRIVATE &&
    			    cmd <= SIOCDEVPRIVATE + 15) ||
    			    cmd == SIOCBONDENSLAVE ||
    			    cmd == SIOCBONDRELEASE ||
    			    cmd == SIOCBONDSETHWADDR ||
    			    cmd == SIOCBONDSLAVEINFOQUERY ||
    			    cmd == SIOCBONDINFOQUERY ||
    			    cmd == SIOCBONDCHANGEACTIVE ||
    			    cmd == SIOCGMIIPHY ||
    			    cmd == SIOCGMIIREG ||
    			    cmd == SIOCSMIIREG ||
    			    cmd == SIOCBRADDIF ||
    			    cmd == SIOCBRDELIF ||
    			    cmd == SIOCWANDEV) {
    				err = -EOPNOTSUPP;
    				if (dev->do_ioctl) {
    					if (netif_device_present(dev))
    						err = dev->do_ioctl(dev, ifr,
    								    cmd);
    					else
    						err = -ENODEV;
    				}
    			} else
    				err = -EINVAL;
    
    	}
    	return err;
    }
    
    /*
     *	This function handles all "interface"-type I/O control requests. The actual
     *	'doing' part of this is dev_ifsioc above.
     */
    
    /**
     *	dev_ioctl	-	network device ioctl
     *	@cmd: command to issue
     *	@arg: pointer to a struct ifreq in user space
     *
     *	Issue ioctl functions to devices. This is normally called by the
     *	user space syscall interfaces but can sometimes be useful for
     *	other purposes. The return value is the return from the syscall if
     *	positive or a negative errno code on error.
     */
    
    int dev_ioctl(unsigned int cmd, void __user *arg)
    {
    	struct ifreq ifr;
    	int ret;
    	char *colon;
    
    	/* One special case: SIOCGIFCONF takes ifconf argument
    	   and requires shared lock, because it sleeps writing
    	   to user space.
    	 */
    
    	if (cmd == SIOCGIFCONF) {
    
    		rtnl_lock();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		ret = dev_ifconf((char __user *) arg);
    
    		rtnl_unlock();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return ret;
    	}
    	if (cmd == SIOCGIFNAME)
    		return dev_ifname((struct ifreq __user *)arg);
    
    	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
    		return -EFAULT;
    
    	ifr.ifr_name[IFNAMSIZ-1] = 0;
    
    	colon = strchr(ifr.ifr_name, ':');
    	if (colon)
    		*colon = 0;
    
    	/*
    	 *	See which interface the caller is talking about.
    	 */
    
    	switch (cmd) {
    		/*
    		 *	These ioctl calls:
    		 *	- can be done by all.
    		 *	- atomic and do not require locking.
    		 *	- return a value
    		 */
    		case SIOCGIFFLAGS:
    		case SIOCGIFMETRIC:
    		case SIOCGIFMTU:
    		case SIOCGIFHWADDR:
    		case SIOCGIFSLAVE:
    		case SIOCGIFMAP:
    		case SIOCGIFINDEX:
    		case SIOCGIFTXQLEN:
    			dev_load(ifr.ifr_name);
    			read_lock(&dev_base_lock);
    			ret = dev_ifsioc(&ifr, cmd);
    			read_unlock(&dev_base_lock);
    			if (!ret) {
    				if (colon)
    					*colon = ':';
    				if (copy_to_user(arg, &ifr,
    						 sizeof(struct ifreq)))
    					ret = -EFAULT;
    			}
    			return ret;
    
    		case SIOCETHTOOL:
    			dev_load(ifr.ifr_name);
    			rtnl_lock();
    			ret = dev_ethtool(&ifr);
    			rtnl_unlock();
    			if (!ret) {
    				if (colon)
    					*colon = ':';
    				if (copy_to_user(arg, &ifr,
    						 sizeof(struct ifreq)))
    					ret = -EFAULT;
    			}
    			return ret;
    
    		/*
    		 *	These ioctl calls:
    		 *	- require superuser power.
    		 *	- require strict serialization.
    		 *	- return a value
    		 */
    		case SIOCGMIIPHY:
    		case SIOCGMIIREG:
    		case SIOCSIFNAME:
    			if (!capable(CAP_NET_ADMIN))
    				return -EPERM;
    			dev_load(ifr.ifr_name);
    			rtnl_lock();
    			ret = dev_ifsioc(&ifr, cmd);
    			rtnl_unlock();
    			if (!ret) {
    				if (colon)
    					*colon = ':';
    				if (copy_to_user(arg, &ifr,
    						 sizeof(struct ifreq)))
    					ret = -EFAULT;
    			}
    			return ret;
    
    		/*
    		 *	These ioctl calls:
    		 *	- require superuser power.
    		 *	- require strict serialization.
    		 *	- do not return a value
    		 */
    		case SIOCSIFFLAGS:
    		case SIOCSIFMETRIC:
    		case SIOCSIFMTU:
    		case SIOCSIFMAP:
    		case SIOCSIFHWADDR:
    		case SIOCSIFSLAVE:
    		case SIOCADDMULTI:
    		case SIOCDELMULTI:
    		case SIOCSIFHWBROADCAST:
    		case SIOCSIFTXQLEN:
    		case SIOCSMIIREG:
    		case SIOCBONDENSLAVE:
    		case SIOCBONDRELEASE:
    		case SIOCBONDSETHWADDR:
    		case SIOCBONDCHANGEACTIVE:
    		case SIOCBRADDIF:
    		case SIOCBRDELIF:
    			if (!capable(CAP_NET_ADMIN))
    				return -EPERM;
    
    			/* fall through */
    		case SIOCBONDSLAVEINFOQUERY:
    		case SIOCBONDINFOQUERY:
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			dev_load(ifr.ifr_name);
    			rtnl_lock();
    			ret = dev_ifsioc(&ifr, cmd);
    			rtnl_unlock();
    			return ret;
    
    		case SIOCGIFMEM:
    			/* Get the per device memory space. We can add this but
    			 * currently do not support it */
    		case SIOCSIFMEM:
    			/* Set the per device memory buffer space.
    			 * Not applicable in our case */
    		case SIOCSIFLINK:
    			return -EINVAL;
    
    		/*
    		 *	Unknown or private ioctl.
    		 */
    		default:
    			if (cmd == SIOCWANDEV ||
    			    (cmd >= SIOCDEVPRIVATE &&
    			     cmd <= SIOCDEVPRIVATE + 15)) {
    				dev_load(ifr.ifr_name);
    				rtnl_lock();
    				ret = dev_ifsioc(&ifr, cmd);
    				rtnl_unlock();
    				if (!ret && copy_to_user(arg, &ifr,
    							 sizeof(struct ifreq)))
    					ret = -EFAULT;
    				return ret;
    			}
    			/* Take care of Wireless Extensions */
    
    			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
    				return wext_handle_ioctl(&ifr, cmd, arg);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			return -EINVAL;
    	}
    }
    
    
    /**
     *	dev_new_index	-	allocate an ifindex
     *
     *	Returns a suitable unique value for a new device interface
     *	number.  The caller must hold the rtnl semaphore or the
     *	dev_base_lock to be sure it remains unique.
     */
    static int dev_new_index(void)
    {
    	static int ifindex;
    	for (;;) {
    		if (++ifindex <= 0)
    			ifindex = 1;
    		if (!__dev_get_by_index(ifindex))
    			return ifindex;
    	}
    }
    
    static int dev_boot_phase = 1;
    
    /* Delayed registration/unregisteration */
    static DEFINE_SPINLOCK(net_todo_list_lock);
    static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
    
    
    static void net_set_todo(struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	spin_lock(&net_todo_list_lock);
    	list_add_tail(&dev->todo_list, &net_todo_list);
    	spin_unlock(&net_todo_list_lock);
    }
    
    /**
     *	register_netdevice	- register a network device
     *	@dev: device to register
     *
     *	Take a completed network device structure and add it to the kernel
     *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
     *	chain. 0 is returned on success. A negative errno code is returned
     *	on a failure to set up the device, or if the name is a duplicate.
     *
     *	Callers must hold the rtnl semaphore. You may want
     *	register_netdev() instead of this.
     *
     *	BUGS:
     *	The locking appears insufficient to guarantee two parallel registers
     *	will not get the same name.
     */
    
    int register_netdevice(struct net_device *dev)
    {
    	struct hlist_head *head;
    	struct hlist_node *p;
    	int ret;
    
    	BUG_ON(dev_boot_phase);
    	ASSERT_RTNL();
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* When net_device's are persistent, this will be fatal. */
    	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
    
    	spin_lock_init(&dev->queue_lock);
    
    Herbert Xu's avatar
    Herbert Xu committed
    	spin_lock_init(&dev->_xmit_lock);
    
    	netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	dev->xmit_lock_owner = -1;
    	spin_lock_init(&dev->ingress_lock);
    
    	dev->iflink = -1;
    
    	/* Init, if this function is available */
    	if (dev->init) {
    		ret = dev->init(dev);
    		if (ret) {
    			if (ret > 0)
    				ret = -EIO;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!dev_valid_name(dev->name)) {
    		ret = -EINVAL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	dev->ifindex = dev_new_index();
    	if (dev->iflink == -1)
    		dev->iflink = dev->ifindex;
    
    	/* Check for existence of name */
    	head = dev_name_hash(dev->name);
    	hlist_for_each(p, head) {
    		struct net_device *d
    			= hlist_entry(p, struct net_device, name_hlist);
    		if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
    			ret = -EEXIST;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/* Fix illegal checksum combinations */
    	if ((dev->features & NETIF_F_HW_CSUM) &&
    	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
    		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
    		       dev->name);
    		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
    	}
    
    	if ((dev->features & NETIF_F_NO_CSUM) &&
    	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
    		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
    		       dev->name);
    		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
    	}
    
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Fix illegal SG+CSUM combinations. */
    	if ((dev->features & NETIF_F_SG) &&
    
    	    !(dev->features & NETIF_F_ALL_CSUM)) {
    
    		printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		       dev->name);
    		dev->features &= ~NETIF_F_SG;
    	}
    
    	/* TSO requires that SG is present as well. */
    	if ((dev->features & NETIF_F_TSO) &&
    	    !(dev->features & NETIF_F_SG)) {
    
    		printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		       dev->name);
    		dev->features &= ~NETIF_F_TSO;
    	}
    
    	if (dev->features & NETIF_F_UFO) {
    		if (!(dev->features & NETIF_F_HW_CSUM)) {
    			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
    					"NETIF_F_HW_CSUM feature.\n",
    							dev->name);
    			dev->features &= ~NETIF_F_UFO;
    		}
    		if (!(dev->features & NETIF_F_SG)) {
    			printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
    					"NETIF_F_SG feature.\n",
    					dev->name);
    			dev->features &= ~NETIF_F_UFO;
    		}
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/*
    	 *	nil rebuild_header routine,
    	 *	that should be never called and used as just bug trap.
    	 */
    
    	if (!dev->rebuild_header)
    		dev->rebuild_header = default_rebuild_header;
    
    
    	ret = netdev_register_sysfs(dev);
    	if (ret)
    
    	dev->reg_state = NETREG_REGISTERED;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 *	Default initial state at registry is that the
    	 *	device is present.
    	 */
    
    	set_bit(__LINK_STATE_PRESENT, &dev->state);
    
    	dev_init_scheduler(dev);
    	write_lock_bh(&dev_base_lock);
    
    	list_add_tail(&dev->dev_list, &dev_base_head);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	hlist_add_head(&dev->name_hlist, head);
    	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
    	dev_hold(dev);
    	write_unlock_bh(&dev_base_lock);
    
    	/* Notify protocols, that a new device appeared. */
    
    	raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	ret = 0;
    
    out:
    	return ret;
    }
    
    /**
     *	register_netdev	- register a network device
     *	@dev: device to register
     *
     *	Take a completed network device structure and add it to the kernel
     *	interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
     *	chain. 0 is returned on success. A negative errno code is returned
     *	on a failure to set up the device, or if the name is a duplicate.
     *
    
     *	This is a wrapper around register_netdevice that takes the rtnl semaphore
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *	and expands the device name if you passed a format string to
     *	alloc_netdev.
     */
    int register_netdev(struct net_device *dev)
    {
    	int err;
    
    	rtnl_lock();
    
    	/*
    	 * If the name is a format string the caller wants us to do a
    	 * name allocation.
    	 */
    	if (strchr(dev->name, '%')) {
    		err = dev_alloc_name(dev, dev->name);
    		if (err < 0)
    			goto out;
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	err = register_netdevice(dev);
    out:
    	rtnl_unlock();
    	return err;
    }
    EXPORT_SYMBOL(register_netdev);
    
    /*
     * netdev_wait_allrefs - wait until all references are gone.
     *
     * This is called when unregistering network devices.
     *
     * Any protocol or device that holds a reference should register
     * for netdevice notification, and cleanup and put back the
     * reference if they receive an UNREGISTER event.
     * We can get stuck here if buggy protocols don't correctly
    
     * call dev_put.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    static void netdev_wait_allrefs(struct net_device *dev)
    {
    	unsigned long rebroadcast_time, warning_time;
    
    	rebroadcast_time = warning_time = jiffies;
    	while (atomic_read(&dev->refcnt) != 0) {
    		if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
    
    			rtnl_lock();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    			/* Rebroadcast unregister notification */
    
    			raw_notifier_call_chain(&netdev_chain,
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    					    NETDEV_UNREGISTER, dev);
    
    			if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
    				     &dev->state)) {
    				/* We must not have linkwatch events
    				 * pending on unregister. If this
    				 * happens, we simply run the queue
    				 * unscheduled, resulting in a noop
    				 * for this device.
    				 */
    				linkwatch_run_queue();
    			}
    
    
    			__rtnl_unlock();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    			rebroadcast_time = jiffies;
    		}
    
    		msleep(250);
    
    		if (time_after(jiffies, warning_time + 10 * HZ)) {
    			printk(KERN_EMERG "unregister_netdevice: "
    			       "waiting for %s to become free. Usage "
    			       "count = %d\n",
    			       dev->name, atomic_read(&dev->refcnt));
    			warning_time = jiffies;
    		}
    	}
    }
    
    /* The sequence is:
     *
     *	rtnl_lock();
     *	...
     *	register_netdevice(x1);
     *	register_netdevice(x2);
     *	...
     *	unregister_netdevice(y1);
     *	unregister_netdevice(y2);
     *      ...
     *	rtnl_unlock();
     *	free_netdev(y1);
     *	free_netdev(y2);
     *
     * We are invoked by rtnl_unlock() after it drops the semaphore.
     * This allows us to deal with problems:
    
     * 1) We can delete sysfs objects which invoke hotplug
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *    without deadlocking with linkwatch via keventd.
     * 2) Since we run with the RTNL semaphore not held, we can sleep
     *    safely in order to wait for the netdev refcnt to drop to zero.
     */
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    static DEFINE_MUTEX(net_todo_run_mutex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    void netdev_run_todo(void)
    {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Need to guard against multiple cpu's getting out of order. */
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    	mutex_lock(&net_todo_run_mutex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/* Not safe to do outside the semaphore.  We must not return
    	 * until all unregister events invoked by the local processor
    	 * have been completed (either by this todo run, or one on
    	 * another cpu).
    	 */
    	if (list_empty(&net_todo_list))
    		goto out;
    
    	/* Snapshot list, allow later requests */
    	spin_lock(&net_todo_list_lock);
    
    	list_replace_init(&net_todo_list, &list);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	spin_unlock(&net_todo_list_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	while (!list_empty(&list)) {
    		struct net_device *dev
    			= list_entry(list.next, struct net_device, todo_list);
    		list_del(&dev->todo_list);
    
    
    		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
    			printk(KERN_ERR "network todo '%s' but state %d\n",
    			       dev->name, dev->reg_state);
    			dump_stack();
    			continue;
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		dev->reg_state = NETREG_UNREGISTERED;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* paranoia */
    		BUG_ON(atomic_read(&dev->refcnt));
    		BUG_TRAP(!dev->ip_ptr);
    		BUG_TRAP(!dev->ip6_ptr);
    		BUG_TRAP(!dev->dn_ptr);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (dev->destructor)
    			dev->destructor(dev);
    
    
    		/* Free network device */
    		kobject_put(&dev->dev.kobj);
    
    Arjan van de Ven's avatar
    Arjan van de Ven committed
    	mutex_unlock(&net_todo_run_mutex);
    
    static struct net_device_stats *internal_stats(struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /**
    
     *	alloc_netdev_mq - allocate network device
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *	@sizeof_priv:	size of private data to allocate space for
     *	@name:		device name format string
     *	@setup:		callback to initialize device
    
     *	@queue_count:	the number of subqueues to allocate
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
     *	Allocates a struct net_device with private data area for driver use
    
     *	and performs basic initialization.  Also allocates subquue structs
     *	for each queue on the device at the end of the netdevice.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
    		void (*setup)(struct net_device *), unsigned int queue_count)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	void *p;
    	struct net_device *dev;
    	int alloc_size;
    
    
    	BUG_ON(strlen(name) >= sizeof(dev->name));
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* ensure 32-byte alignment of both the device and private area */
    
    	alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
    		     (sizeof(struct net_device_subqueue) * queue_count)) &
    		     ~NETDEV_ALIGN_CONST;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
    
    
    	p = kzalloc(alloc_size, GFP_KERNEL);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!p) {
    
    		printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return NULL;
    	}
    
    	dev = (struct net_device *)
    		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
    	dev->padded = (char *)dev - (char *)p;
    
    
    	if (sizeof_priv) {
    		dev->priv = ((char *)dev +
    			     ((sizeof(struct net_device) +
    			       (sizeof(struct net_device_subqueue) *
    				queue_count) + NETDEV_ALIGN_CONST)
    			      & ~NETDEV_ALIGN_CONST));
    	}
    
    	dev->egress_subqueue_count = queue_count;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	dev->get_stats = internal_stats;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	setup(dev);
    	strcpy(dev->name, name);
    	return dev;
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /**
     *	free_netdev - free network device
     *	@dev: device
     *
    
     *	This function does the last stage of destroying an allocated device
     * 	interface. The reference to the device object is released.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *	If this is the last reference then it will be freed.
     */
    void free_netdev(struct net_device *dev)
    {
    #ifdef CONFIG_SYSFS
    
    	/*  Compatibility with error handling in drivers */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (dev->reg_state == NETREG_UNINITIALIZED) {
    		kfree((char *)dev - dev->padded);
    		return;
    	}
    
    	BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
    	dev->reg_state = NETREG_RELEASED;
    
    
    	/* will free via device release */
    	put_device(&dev->dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #else
    	kfree((char *)dev - dev->padded);
    #endif
    }
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* Synchronize with packet receive processing. */
    
    void synchronize_net(void)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	might_sleep();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    /**
     *	unregister_netdevice - remove device from the kernel
     *	@dev: device
     *
     *	This function shuts down a device interface and removes it
     *	from the kernel tables. On success 0 is returned, on a failure
     *	a negative errno code is returned.
     *
     *	Callers must hold the rtnl semaphore.  You may want
     *	unregister_netdev() instead of this.
     */
    
    
    void unregister_netdevice(struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	BUG_ON(dev_boot_phase);
    	ASSERT_RTNL();
    
    	/* Some devices call without registering for initialization unwind. */
    	if (dev->reg_state == NETREG_UNINITIALIZED) {
    		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
    				  "was registered\n", dev->name, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    	BUG_ON(dev->reg_state != NETREG_REGISTERED);
    
    	/* If device is running, close it first. */
    	if (dev->flags & IFF_UP)
    		dev_close(dev);
    
    	/* And unlink it from device chain. */
    
    	write_lock_bh(&dev_base_lock);
    	list_del(&dev->dev_list);
    	hlist_del(&dev->name_hlist);
    	hlist_del(&dev->index_hlist);
    	write_unlock_bh(&dev_base_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	dev->reg_state = NETREG_UNREGISTERING;
    
    	synchronize_net();
    
    	/* Shutdown queueing discipline. */
    	dev_shutdown(dev);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Notify protocols, that we are about to destroy
    	   this device. They should clean all the things.
    	*/
    
    	raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    
    	 *	Flush the unicast and multicast chains
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    
    	dev_unicast_discard(dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	dev_mc_discard(dev);
    
    	if (dev->uninit)
    		dev->uninit(dev);
    
    	/* Notifier chain MUST detach us from master device. */
    	BUG_TRAP(!dev->master);
    
    
    	/* Remove entries from sysfs */
    	netdev_unregister_sysfs(dev);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* Finish processing unregister after unlock */
    	net_set_todo(dev);
    
    	synchronize_net();
    
    	dev_put(dev);
    }
    
    /**
     *	unregister_netdev - remove device from the kernel
     *	@dev: device
     *
     *	This function shuts down a device interface and removes it
     *	from the kernel tables. On success 0 is returned, on a failure
     *	a negative errno code is returned.
     *
     *	This is just a wrapper for unregister_netdevice that takes
     *	the rtnl semaphore.  In general you want to use this and not
     *	unregister_netdevice.
     */
    void unregister_netdev(struct net_device *dev)
    {
    	rtnl_lock();
    	unregister_netdevice(dev);
    	rtnl_unlock();
    }
    
    EXPORT_SYMBOL(unregister_netdev);
    
    static int dev_cpu_callback(struct notifier_block *nfb,
    			    unsigned long action,
    			    void *ocpu)
    {
    	struct sk_buff **list_skb;
    	struct net_device **list_net;
    	struct sk_buff *skb;
    	unsigned int cpu, oldcpu = (unsigned long)ocpu;
    	struct softnet_data *sd, *oldsd;
    
    
    	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return NOTIFY_OK;
    
    	local_irq_disable();
    	cpu = smp_processor_id();
    	sd = &per_cpu(softnet_data, cpu);
    	oldsd = &per_cpu(softnet_data, oldcpu);
    
    	/* Find end of our completion_queue. */
    	list_skb = &sd->completion_queue;
    	while (*list_skb)
    		list_skb = &(*list_skb)->next;
    	/* Append completion queue from offline CPU. */
    	*list_skb = oldsd->completion_queue;
    	oldsd->completion_queue = NULL;
    
    	/* Find end of our output_queue. */
    	list_net = &sd->output_queue;
    	while (*list_net)
    		list_net = &(*list_net)->next_sched;
    	/* Append output queue from offline CPU. */
    	*list_net = oldsd->output_queue;
    	oldsd->output_queue = NULL;
    
    	raise_softirq_irqoff(NET_TX_SOFTIRQ);
    	local_irq_enable();
    
    	/* Process offline CPU's input_pkt_queue */
    	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
    		netif_rx(skb);
    
    	return NOTIFY_OK;
    }
    
    
    #ifdef CONFIG_NET_DMA
    /**
     * net_dma_rebalance -
     * This is called when the number of channels allocated to the net_dma_client
     * changes.  The net_dma_client tries to have one DMA channel per CPU.
     */
    
    
    static void net_dma_rebalance(struct net_dma *net_dma)
    
    	unsigned int cpu, i, n, chan_idx;
    
    	if (cpus_empty(net_dma->channel_mask)) {
    
    		for_each_online_cpu(cpu)
    
    			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
    
    		return;
    	}
    
    	i = 0;
    	cpu = first_cpu(cpu_online_map);
    
    
    	for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
    		chan = net_dma->channels[chan_idx];
    
    		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
    		   + (i < (num_online_cpus() %
    			cpus_weight(net_dma->channel_mask)) ? 1 : 0));
    
    			per_cpu(softnet_data, cpu).net_dma = chan;
    
    			cpu = next_cpu(cpu, cpu_online_map);
    			n--;
    		}
    		i++;
    	}
    }
    
    /**
     * netdev_dma_event - event callback for the net_dma_client
     * @client: should always be net_dma_client
    
     * @chan: DMA channel for the event
     * @event: event type
    
    static enum dma_state_client
    netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
    	enum dma_state state)
    {
    	int i, found = 0, pos = -1;
    	struct net_dma *net_dma =
    		container_of(client, struct net_dma, client);
    	enum dma_state_client ack = DMA_DUP; /* default: take no action */
    
    	spin_lock(&net_dma->lock);
    	switch (state) {
    	case DMA_RESOURCE_AVAILABLE:
    		for (i = 0; i < NR_CPUS; i++)
    			if (net_dma->channels[i] == chan) {
    				found = 1;
    				break;
    			} else if (net_dma->channels[i] == NULL && pos < 0)
    				pos = i;
    
    		if (!found && pos >= 0) {
    			ack = DMA_ACK;
    			net_dma->channels[pos] = chan;
    			cpu_set(pos, net_dma->channel_mask);
    			net_dma_rebalance(net_dma);
    		}
    
    		break;
    	case DMA_RESOURCE_REMOVED:
    
    		for (i = 0; i < NR_CPUS; i++)
    			if (net_dma->channels[i] == chan) {
    				found = 1;
    				pos = i;
    				break;
    			}
    
    		if (found) {
    			ack = DMA_ACK;
    			cpu_clear(pos, net_dma->channel_mask);
    			net_dma->channels[i] = NULL;
    			net_dma_rebalance(net_dma);
    		}
    
    	spin_unlock(&net_dma->lock);
    
    	return ack;
    
    }
    
    /**
     * netdev_dma_regiser - register the networking subsystem as a DMA client
     */
    static int __init netdev_dma_register(void)
    {
    
    	spin_lock_init(&net_dma.lock);
    	dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
    	dma_async_client_register(&net_dma.client);
    	dma_async_client_chan_request(&net_dma.client);
    
    	return 0;
    }
    
    #else
    static int __init netdev_dma_register(void) { return -ENODEV; }
    #endif /* CONFIG_NET_DMA */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /*
     *	Initialize the DEV module. At boot time this walks the device list and
     *	unhooks any devices that fail to initialise (normally hardware not
     *	present) and leaves us with a valid list of present and active devices.
     *
     */
    
    /*
     *       This is called single threaded during boot, so no need
     *       to take the rtnl semaphore.
     */
    static int __init net_dev_init(void)
    {
    	int i, rc = -ENOMEM;
    
    	BUG_ON(!dev_boot_phase);
    
    	if (dev_proc_init())
    		goto out;
    
    	if (netdev_sysfs_init())
    		goto out;
    
    	INIT_LIST_HEAD(&ptype_all);
    
    	for (i = 0; i < 16; i++)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		INIT_LIST_HEAD(&ptype_base[i]);
    
    	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
    		INIT_HLIST_HEAD(&dev_name_head[i]);
    
    	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
    		INIT_HLIST_HEAD(&dev_index_head[i]);
    
    	/*
    	 *	Initialise the packet receive queues.
    	 */
    
    
    	for_each_possible_cpu(i) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		struct softnet_data *queue;
    
    		queue = &per_cpu(softnet_data, i);
    		skb_queue_head_init(&queue->input_pkt_queue);
    		queue->completion_queue = NULL;
    		INIT_LIST_HEAD(&queue->poll_list);
    		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
    		queue->backlog_dev.weight = weight_p;
    		queue->backlog_dev.poll = process_backlog;
    		atomic_set(&queue->backlog_dev.refcnt, 1);
    	}
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	dev_boot_phase = 0;