Newer
Older
/*
* netdev_wait_allrefs - wait until all references are gone.
*
* This is called when unregistering network devices.
*
* Any protocol or device that holds a reference should register
* for netdevice notification, and cleanup and put back the
* reference if they receive an UNREGISTER event.
* We can get stuck here if buggy protocols don't correctly
*/
static void netdev_wait_allrefs(struct net_device *dev)
{
unsigned long rebroadcast_time, warning_time;
rebroadcast_time = warning_time = jiffies;
while (atomic_read(&dev->refcnt) != 0) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
/* We must not have linkwatch events
* pending on unregister. If this
* happens, we simply run the queue
* unscheduled, resulting in a noop
* for this device.
*/
linkwatch_run_queue();
}
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
5059
5060
5061
5062
5063
5064
5065
rebroadcast_time = jiffies;
}
msleep(250);
if (time_after(jiffies, warning_time + 10 * HZ)) {
printk(KERN_EMERG "unregister_netdevice: "
"waiting for %s to become free. Usage "
"count = %d\n",
dev->name, atomic_read(&dev->refcnt));
warning_time = jiffies;
}
}
}
/* The sequence is:
*
* rtnl_lock();
* ...
* register_netdevice(x1);
* register_netdevice(x2);
* ...
* unregister_netdevice(y1);
* unregister_netdevice(y2);
* ...
* rtnl_unlock();
* free_netdev(y1);
* free_netdev(y2);
*
* 1) We can delete sysfs objects which invoke hotplug
* without deadlocking with linkwatch via keventd.
* 2) Since we run with the RTNL semaphore not held, we can sleep
* safely in order to wait for the netdev refcnt to drop to zero.
*
* We must not return until all unregister events added during
* the interval the lock was held have been completed.
struct list_head list;
list_replace_init(&net_todo_list, &list);
while (!list_empty(&list)) {
struct net_device *dev
= list_entry(list.next, struct net_device, todo_list);
list_del(&dev->todo_list);
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
printk(KERN_ERR "network todo '%s' but state %d\n",
dev->name, dev->reg_state);
dump_stack();
continue;
}
dev->reg_state = NETREG_UNREGISTERED;
on_each_cpu(flush_backlog, dev, 1);
netdev_wait_allrefs(dev);
/* paranoia */
BUG_ON(atomic_read(&dev->refcnt));
WARN_ON(dev->ip_ptr);
WARN_ON(dev->ip6_ptr);
WARN_ON(dev->dn_ptr);
if (dev->destructor)
dev->destructor(dev);
/* Free network device */
kobject_put(&dev->dev.kobj);
/**
* dev_get_stats - get network device statistics
* @dev: device to get statistics from
*
* Get network statistics from device. The device driver may provide
* its own method by setting dev->netdev_ops->get_stats; otherwise
* the internal statistics structure is used.
*/
const struct net_device_stats *dev_get_stats(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_get_stats)
return ops->ndo_get_stats(dev);
else {
unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
struct net_device_stats *stats = &dev->stats;
unsigned int i;
struct netdev_queue *txq;
for (i = 0; i < dev->num_tx_queues; i++) {
txq = netdev_get_tx_queue(dev, i);
tx_bytes += txq->tx_bytes;
tx_packets += txq->tx_packets;
tx_dropped += txq->tx_dropped;
}
if (tx_bytes || tx_packets || tx_dropped) {
stats->tx_bytes = tx_bytes;
stats->tx_packets = tx_packets;
stats->tx_dropped = tx_dropped;
}
return stats;
}
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue,
void *_unused)
{
queue->dev = dev;
}
static void netdev_init_queues(struct net_device *dev)
{
netdev_init_one_queue(dev, &dev->rx_queue, NULL);
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
spin_lock_init(&dev->tx_global_lock);
* alloc_netdev_mq - allocate network device
* @sizeof_priv: size of private data to allocate space for
* @name: device name format string
* @setup: callback to initialize device
* @queue_count: the number of subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subquue structs
* for each queue on the device at the end of the netdevice.
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
alloc_size += sizeof_priv;
}
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
p = kzalloc(alloc_size, GFP_KERNEL);
printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
if (!tx) {
printk(KERN_ERR "alloc_netdev: Unable to allocate "
"tx qdiscs.\n");
dev = PTR_ALIGN(p, NETDEV_ALIGN);
if (dev_addr_init(dev))
goto free_tx;
dev_net_set(dev, &init_net);
dev->_tx = tx;
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
dev->gso_max_size = GSO_MAX_SIZE;
netdev_init_queues(dev);
INIT_LIST_HEAD(&dev->napi_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
strcpy(dev->name, name);
return dev;
free_tx:
kfree(tx);
free_p:
kfree(p);
return NULL;
EXPORT_SYMBOL(alloc_netdev_mq);
/**
* free_netdev - free network device
* @dev: device
*
* This function does the last stage of destroying an allocated device
* interface. The reference to the device object is released.
* If this is the last reference then it will be freed.
*/
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
release_net(dev_net(dev));
/* Flush device addresses */
dev_addr_flush(dev);
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
return;
}
BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
dev->reg_state = NETREG_RELEASED;

Greg Kroah-Hartman
committed
/* will free via device release */
put_device(&dev->dev);
/**
* synchronize_net - Synchronize with packet receive processing
*
* Wait for packets currently being received to be done.
* Does not block later packets from starting.
*/
synchronize_rcu();
* unregister_netdevice_queue - remove device from the kernel
* This function shuts down a device interface and removes it
* If head not NULL, device is queued to be unregistered later.
*
* Callers must hold the rtnl semaphore. You may want
* unregister_netdev() instead of this.
*/
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
if (head) {
list_add_tail(&dev->unreg_list, head);
} else {
rollback_registered(dev);
/* Finish processing unregister after unlock */
net_set_todo(dev);
}
EXPORT_SYMBOL(unregister_netdevice_queue);
/**
* unregister_netdevice_many - unregister many devices
* @head: list of devices
*
*/
void unregister_netdevice_many(struct list_head *head)
{
struct net_device *dev;
if (!list_empty(head)) {
rollback_registered_many(head);
list_for_each_entry(dev, head, unreg_list)
net_set_todo(dev);
}
}
EXPORT_SYMBOL(unregister_netdevice_many);
/**
* unregister_netdev - remove device from the kernel
* @dev: device
*
* This function shuts down a device interface and removes it
*
* This is just a wrapper for unregister_netdevice that takes
* the rtnl semaphore. In general you want to use this and not
* unregister_netdevice.
*/
void unregister_netdev(struct net_device *dev)
{
rtnl_lock();
unregister_netdevice(dev);
rtnl_unlock();
}
EXPORT_SYMBOL(unregister_netdev);
5355
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
/**
* dev_change_net_namespace - move device to different nethost namespace
* @dev: device
* @net: network namespace
* @pat: If not NULL name pattern to try if the current device name
* is already taken in the destination network namespace.
*
* This function shuts down a device interface and moves it
* to a new network namespace. On success 0 is returned, on
* a failure a netagive errno code is returned.
*
* Callers must hold the rtnl semaphore.
*/
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
char buf[IFNAMSIZ];
const char *destname;
int err;
ASSERT_RTNL();
/* Don't allow namespace local devices to be moved. */
err = -EINVAL;
if (dev->features & NETIF_F_NETNS_LOCAL)
goto out;
#ifdef CONFIG_SYSFS
/* Don't allow real devices to be moved when sysfs
* is enabled.
*/
err = -EINVAL;
if (dev->dev.parent)
goto out;
#endif
/* Ensure the device has been registrered */
err = -EINVAL;
if (dev->reg_state != NETREG_REGISTERED)
goto out;
/* Get out if there is nothing todo */
err = 0;
if (net_eq(dev_net(dev), net))
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
goto out;
/* Pick the destination device name, and ensure
* we can use it in the destination network namespace.
*/
err = -EEXIST;
destname = dev->name;
if (__dev_get_by_name(net, destname)) {
/* We get here if we can't use the current device name */
if (!pat)
goto out;
if (!dev_valid_name(pat))
goto out;
if (strchr(pat, '%')) {
if (__dev_alloc_name(net, pat, buf) < 0)
goto out;
destname = buf;
} else
destname = pat;
if (__dev_get_by_name(net, destname))
goto out;
}
/*
* And now a mini version of register_netdevice unregister_netdevice.
*/
/* If device is running close it first. */
dev_close(dev);
/* And unlink it from device chain */
err = -ENODEV;
unlist_netdevice(dev);
synchronize_net();
/* Shutdown queueing discipline. */
dev_shutdown(dev);
/* Notify protocols, that we are about to destroy
this device. They should clean all the things.
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
/*
* Flush the unicast and multicast chains
*/
dev_addr_discard(dev);
netdev_unregister_kobject(dev);
/* Actually switch the network namespace */
dev_net_set(dev, net);
/* Assign the new device name */
if (destname != dev->name)
strcpy(dev->name, destname);
/* If there is an ifindex conflict assign a new one */
if (__dev_get_by_index(net, dev->ifindex)) {
int iflink = (dev->iflink == dev->ifindex);
dev->ifindex = dev_new_index(net);
if (iflink)
dev->iflink = dev->ifindex;
}
err = netdev_register_kobject(dev);
/* Add the device back in the hashes */
list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
call_netdevice_notifiers(NETDEV_REGISTER, dev);
synchronize_net();
err = 0;
out:
return err;
}
EXPORT_SYMBOL_GPL(dev_change_net_namespace);
static int dev_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *ocpu)
{
struct sk_buff **list_skb;
struct Qdisc **list_net;
struct sk_buff *skb;
unsigned int cpu, oldcpu = (unsigned long)ocpu;
struct softnet_data *sd, *oldsd;
if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5494
5495
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
return NOTIFY_OK;
local_irq_disable();
cpu = smp_processor_id();
sd = &per_cpu(softnet_data, cpu);
oldsd = &per_cpu(softnet_data, oldcpu);
/* Find end of our completion_queue. */
list_skb = &sd->completion_queue;
while (*list_skb)
list_skb = &(*list_skb)->next;
/* Append completion queue from offline CPU. */
*list_skb = oldsd->completion_queue;
oldsd->completion_queue = NULL;
/* Find end of our output_queue. */
list_net = &sd->output_queue;
while (*list_net)
list_net = &(*list_net)->next_sched;
/* Append output queue from offline CPU. */
*list_net = oldsd->output_queue;
oldsd->output_queue = NULL;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
netif_rx(skb);
return NOTIFY_OK;
}
* netdev_increment_features - increment feature set by one
* @all: current feature set
* @one: new feature set
* @mask: mask feature set
*
* Computes a new feature set after adding a device with feature set
* @one to the master device with current feature set @all. Will not
* enable anything that is off in @mask. Returns the new feature set.
unsigned long netdev_increment_features(unsigned long all, unsigned long one,
unsigned long mask)
{
/* If device needs checksumming, downgrade to it. */
if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
else if (mask & NETIF_F_ALL_CSUM) {
/* If one device supports v4/v6 checksumming, set for all. */
if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
!(all & NETIF_F_GEN_CSUM)) {
all &= ~NETIF_F_ALL_CSUM;
all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
}
/* If one device supports hw checksumming, set for all. */
if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
all &= ~NETIF_F_ALL_CSUM;
all |= NETIF_F_HW_CSUM;
}
}
one |= all & NETIF_F_ONE_FOR_ALL;
all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
all |= one & mask & NETIF_F_ONE_FOR_ALL;
return all;
}
EXPORT_SYMBOL(netdev_increment_features);
static struct hlist_head *netdev_create_hash(void)
{
int i;
struct hlist_head *hash;
hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
if (hash != NULL)
for (i = 0; i < NETDEV_HASHENTRIES; i++)
INIT_HLIST_HEAD(&hash[i]);
return hash;
}
/* Initialize per network namespace state */
static int __net_init netdev_init(struct net *net)
{
INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)
goto err_name;
net->dev_index_head = netdev_create_hash();
if (net->dev_index_head == NULL)
goto err_idx;
return 0;
err_idx:
kfree(net->dev_name_head);
err_name:
return -ENOMEM;
}
/**
* netdev_drivername - network driver for the device
* @dev: network device
* @buffer: buffer for resulting name
* @len: size of buffer
*
* Determine network driver for device.
*/
char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
const struct device_driver *driver;
const struct device *parent;
if (len <= 0 || !buffer)
return buffer;
buffer[0] = 0;
parent = dev->dev.parent;
if (!parent)
return buffer;
driver = parent->driver;
if (driver && driver->name)
strlcpy(buffer, driver->name, len);
return buffer;
}
static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
}
static struct pernet_operations __net_initdata netdev_net_ops = {
.init = netdev_init,
.exit = netdev_exit,
};
static void __net_exit default_device_exit(struct net *net)
struct net_device *dev;
/*
* Push all migratable of the network devices back to the
* initial network namespace
*/
rtnl_lock();
restart:
for_each_netdev(net, dev) {
int err;
char fb_name[IFNAMSIZ];
/* Ignore unmoveable devices (i.e. loopback) */
if (dev->features & NETIF_F_NETNS_LOCAL)
continue;
/* Delete virtual devices */
if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
dev->rtnl_link_ops->dellink(dev, NULL);
goto restart;
/* Push remaing network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
__func__, dev->name, err);
goto restart;
}
rtnl_unlock();
}
static struct pernet_operations __net_initdata default_device_ops = {
.exit = default_device_exit,
};
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
* present) and leaves us with a valid list of present and active devices.
*
*/
/*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
*/
static int __init net_dev_init(void)
{
int i, rc = -ENOMEM;
BUG_ON(!dev_boot_phase);
if (dev_proc_init())
goto out;
for (i = 0; i < PTYPE_HASH_SIZE; i++)
if (register_pernet_subsys(&netdev_net_ops))
goto out;
/*
* Initialise the packet receive queues.
*/
for_each_possible_cpu(i) {
struct softnet_data *queue;
queue = &per_cpu(softnet_data, i);
skb_queue_head_init(&queue->input_pkt_queue);
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
queue->backlog.gro_list = NULL;
queue->backlog.gro_count = 0;
/* The loopback device is special if any other network devices
* is present in a network namespace the loopback device must
* be present. Since we now dynamically allocate and free the
* loopback device ensure this invariant is maintained by
* keeping the loopback device as the first device on the
* list of network devices. Ensuring the loopback devices
* is the first device that appears and the last network device
* that disappears.
*/
if (register_pernet_device(&loopback_net_ops))
goto out;
if (register_pernet_device(&default_device_ops))
goto out;
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
hotcpu_notifier(dev_cpu_callback, 0);
dst_init();
dev_mcast_init();
rc = 0;
out:
return rc;
}
subsys_initcall(net_dev_init);
static int __init initialize_hashrnd(void)
{
get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
return 0;
}
late_initcall_sync(initialize_hashrnd);