Newer
Older
/* Notify protocols, that a new device appeared. */
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
ret = notifier_to_errno(ret);
if (ret) {
rollback_registered(dev);
dev->reg_state = NETREG_UNREGISTERED;
}
if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev);
5020
5021
5022
5023
5024
5025
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035
5036
5037
5038
5039
5040
5041
5042
5043
5044
5045
5046
5047
5048
5049
5050
5051
5052
5053
5054
5055
5056
5057
5058
/**
* init_dummy_netdev - init a dummy network device for NAPI
* @dev: device to init
*
* This takes a network device structure and initialize the minimum
* amount of fields so it can be used to schedule NAPI polls without
* registering a full blown interface. This is to be used by drivers
* that need to tie several hardware interfaces to a single NAPI
* poll scheduler due to HW limitations.
*/
int init_dummy_netdev(struct net_device *dev)
{
/* Clear everything. Note we don't initialize spinlocks
* are they aren't supposed to be taken by any of the
* NAPI code and this dummy netdev is supposed to be
* only ever used for NAPI polls
*/
memset(dev, 0, sizeof(struct net_device));
/* make sure we BUG if trying to hit standard
* register/unregister code path
*/
dev->reg_state = NETREG_DUMMY;
/* initialize the ref count */
atomic_set(&dev->refcnt, 1);
/* NAPI wants this */
INIT_LIST_HEAD(&dev->napi_list);
/* a dummy interface is started by default */
set_bit(__LINK_STATE_PRESENT, &dev->state);
set_bit(__LINK_STATE_START, &dev->state);
return 0;
}
EXPORT_SYMBOL_GPL(init_dummy_netdev);
/**
* register_netdev - register a network device
* @dev: device to register
*
* Take a completed network device structure and add it to the kernel
* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
* This is a wrapper around register_netdevice that takes the rtnl semaphore
* and expands the device name if you passed a format string to
* alloc_netdev.
*/
int register_netdev(struct net_device *dev)
{
int err;
rtnl_lock();
/*
* If the name is a format string the caller wants us to do a
* name allocation.
*/
if (strchr(dev->name, '%')) {
err = dev_alloc_name(dev, dev->name);
if (err < 0)
goto out;
}
err = register_netdevice(dev);
out:
rtnl_unlock();
return err;
}
EXPORT_SYMBOL(register_netdev);
/*
* netdev_wait_allrefs - wait until all references are gone.
*
* This is called when unregistering network devices.
*
* Any protocol or device that holds a reference should register
* for netdevice notification, and cleanup and put back the
* reference if they receive an UNREGISTER event.
* We can get stuck here if buggy protocols don't correctly
*/
static void netdev_wait_allrefs(struct net_device *dev)
{
unsigned long rebroadcast_time, warning_time;
linkwatch_forget_dev(dev);
rebroadcast_time = warning_time = jiffies;
while (atomic_read(&dev->refcnt) != 0) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users
* should have already handle it the first time */
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
/* We must not have linkwatch events
* pending on unregister. If this
* happens, we simply run the queue
* unscheduled, resulting in a noop
* for this device.
*/
linkwatch_run_queue();
}
5134
5135
5136
5137
5138
5139
5140
5141
5142
5143
5144
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162
5163
rebroadcast_time = jiffies;
}
msleep(250);
if (time_after(jiffies, warning_time + 10 * HZ)) {
printk(KERN_EMERG "unregister_netdevice: "
"waiting for %s to become free. Usage "
"count = %d\n",
dev->name, atomic_read(&dev->refcnt));
warning_time = jiffies;
}
}
}
/* The sequence is:
*
* rtnl_lock();
* ...
* register_netdevice(x1);
* register_netdevice(x2);
* ...
* unregister_netdevice(y1);
* unregister_netdevice(y2);
* ...
* rtnl_unlock();
* free_netdev(y1);
* free_netdev(y2);
*
* 1) We can delete sysfs objects which invoke hotplug
* without deadlocking with linkwatch via keventd.
* 2) Since we run with the RTNL semaphore not held, we can sleep
* safely in order to wait for the netdev refcnt to drop to zero.
*
* We must not return until all unregister events added during
* the interval the lock was held have been completed.
struct list_head list;
list_replace_init(&net_todo_list, &list);
while (!list_empty(&list)) {
struct net_device *dev
= list_entry(list.next, struct net_device, todo_list);
list_del(&dev->todo_list);
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
printk(KERN_ERR "network todo '%s' but state %d\n",
dev->name, dev->reg_state);
dump_stack();
continue;
}
dev->reg_state = NETREG_UNREGISTERED;
on_each_cpu(flush_backlog, dev, 1);
netdev_wait_allrefs(dev);
/* paranoia */
BUG_ON(atomic_read(&dev->refcnt));
WARN_ON(dev->ip_ptr);
WARN_ON(dev->ip6_ptr);
WARN_ON(dev->dn_ptr);
if (dev->destructor)
dev->destructor(dev);
/* Free network device */
kobject_put(&dev->dev.kobj);
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227
5228
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
/**
* dev_txq_stats_fold - fold tx_queues stats
* @dev: device to get statistics from
* @stats: struct net_device_stats to hold results
*/
void dev_txq_stats_fold(const struct net_device *dev,
struct net_device_stats *stats)
{
unsigned long tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
unsigned int i;
struct netdev_queue *txq;
for (i = 0; i < dev->num_tx_queues; i++) {
txq = netdev_get_tx_queue(dev, i);
tx_bytes += txq->tx_bytes;
tx_packets += txq->tx_packets;
tx_dropped += txq->tx_dropped;
}
if (tx_bytes || tx_packets || tx_dropped) {
stats->tx_bytes = tx_bytes;
stats->tx_packets = tx_packets;
stats->tx_dropped = tx_dropped;
}
}
EXPORT_SYMBOL(dev_txq_stats_fold);
/**
* dev_get_stats - get network device statistics
* @dev: device to get statistics from
*
* Get network statistics from device. The device driver may provide
* its own method by setting dev->netdev_ops->get_stats; otherwise
* the internal statistics structure is used.
*/
const struct net_device_stats *dev_get_stats(struct net_device *dev)
{
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_get_stats)
return ops->ndo_get_stats(dev);
dev_txq_stats_fold(dev, &dev->stats);
return &dev->stats;
static void netdev_init_one_queue(struct net_device *dev,
struct netdev_queue *queue,
void *_unused)
{
queue->dev = dev;
}
static void netdev_init_queues(struct net_device *dev)
{
netdev_init_one_queue(dev, &dev->rx_queue, NULL);
netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
spin_lock_init(&dev->tx_global_lock);
* alloc_netdev_mq - allocate network device
* @sizeof_priv: size of private data to allocate space for
* @name: device name format string
* @setup: callback to initialize device
* @queue_count: the number of subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subquue structs
* for each queue on the device at the end of the netdevice.
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
alloc_size = sizeof(struct net_device);
if (sizeof_priv) {
/* ensure 32-byte alignment of private area */
alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
alloc_size += sizeof_priv;
}
/* ensure 32-byte alignment of whole construct */
alloc_size += NETDEV_ALIGN - 1;
p = kzalloc(alloc_size, GFP_KERNEL);
printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
if (!tx) {
printk(KERN_ERR "alloc_netdev: Unable to allocate "
"tx qdiscs.\n");
dev = PTR_ALIGN(p, NETDEV_ALIGN);
if (dev_addr_init(dev))
goto free_tx;
dev_net_set(dev, &init_net);
dev->_tx = tx;
dev->num_tx_queues = queue_count;
dev->real_num_tx_queues = queue_count;
dev->gso_max_size = GSO_MAX_SIZE;
netdev_init_queues(dev);
INIT_LIST_HEAD(&dev->napi_list);
INIT_LIST_HEAD(&dev->unreg_list);
INIT_LIST_HEAD(&dev->link_watch_list);
dev->priv_flags = IFF_XMIT_DST_RELEASE;
setup(dev);
strcpy(dev->name, name);
return dev;
free_tx:
kfree(tx);
free_p:
kfree(p);
return NULL;
EXPORT_SYMBOL(alloc_netdev_mq);
/**
* free_netdev - free network device
* @dev: device
*
* This function does the last stage of destroying an allocated device
* interface. The reference to the device object is released.
* If this is the last reference then it will be freed.
*/
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
release_net(dev_net(dev));
/* Flush device addresses */
dev_addr_flush(dev);
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
return;
}
BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
dev->reg_state = NETREG_RELEASED;

Greg Kroah-Hartman
committed
/* will free via device release */
put_device(&dev->dev);
/**
* synchronize_net - Synchronize with packet receive processing
*
* Wait for packets currently being received to be done.
* Does not block later packets from starting.
*/
synchronize_rcu();
* unregister_netdevice_queue - remove device from the kernel
* This function shuts down a device interface and removes it
* If head not NULL, device is queued to be unregistered later.
*
* Callers must hold the rtnl semaphore. You may want
* unregister_netdev() instead of this.
*/
void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
list_move_tail(&dev->unreg_list, head);
} else {
rollback_registered(dev);
/* Finish processing unregister after unlock */
net_set_todo(dev);
}
EXPORT_SYMBOL(unregister_netdevice_queue);
/**
* unregister_netdevice_many - unregister many devices
* @head: list of devices
*/
void unregister_netdevice_many(struct list_head *head)
{
struct net_device *dev;
if (!list_empty(head)) {
rollback_registered_many(head);
list_for_each_entry(dev, head, unreg_list)
net_set_todo(dev);
}
}
EXPORT_SYMBOL(unregister_netdevice_many);
/**
* unregister_netdev - remove device from the kernel
* @dev: device
*
* This function shuts down a device interface and removes it
*
* This is just a wrapper for unregister_netdevice that takes
* the rtnl semaphore. In general you want to use this and not
* unregister_netdevice.
*/
void unregister_netdev(struct net_device *dev)
{
rtnl_lock();
unregister_netdevice(dev);
rtnl_unlock();
}
EXPORT_SYMBOL(unregister_netdev);
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487
5488
/**
* dev_change_net_namespace - move device to different nethost namespace
* @dev: device
* @net: network namespace
* @pat: If not NULL name pattern to try if the current device name
* is already taken in the destination network namespace.
*
* This function shuts down a device interface and moves it
* to a new network namespace. On success 0 is returned, on
* a failure a netagive errno code is returned.
*
* Callers must hold the rtnl semaphore.
*/
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
int err;
ASSERT_RTNL();
/* Don't allow namespace local devices to be moved. */
err = -EINVAL;
if (dev->features & NETIF_F_NETNS_LOCAL)
goto out;
#ifdef CONFIG_SYSFS
/* Don't allow real devices to be moved when sysfs
* is enabled.
*/
err = -EINVAL;
if (dev->dev.parent)
goto out;
#endif
/* Ensure the device has been registrered */
err = -EINVAL;
if (dev->reg_state != NETREG_REGISTERED)
goto out;
/* Get out if there is nothing todo */
err = 0;
if (net_eq(dev_net(dev), net))
goto out;
/* Pick the destination device name, and ensure
* we can use it in the destination network namespace.
*/
err = -EEXIST;
if (__dev_get_by_name(net, dev->name)) {
/* We get here if we can't use the current device name */
if (!pat)
goto out;
if (dev_get_valid_name(net, pat, dev->name, 1))
goto out;
}
/*
* And now a mini version of register_netdevice unregister_netdevice.
*/
/* If device is running close it first. */
dev_close(dev);
/* And unlink it from device chain */
err = -ENODEV;
unlist_netdevice(dev);
synchronize_net();
/* Shutdown queueing discipline. */
dev_shutdown(dev);
/* Notify protocols, that we are about to destroy
this device. They should clean all the things.
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
/*
* Flush the unicast and multicast chains
*/
dev_addr_discard(dev);
netdev_unregister_kobject(dev);
/* Actually switch the network namespace */
dev_net_set(dev, net);
/* If there is an ifindex conflict assign a new one */
if (__dev_get_by_index(net, dev->ifindex)) {
int iflink = (dev->iflink == dev->ifindex);
dev->ifindex = dev_new_index(net);
if (iflink)
dev->iflink = dev->ifindex;
}
err = netdev_register_kobject(dev);
/* Add the device back in the hashes */
list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
call_netdevice_notifiers(NETDEV_REGISTER, dev);
synchronize_net();
err = 0;
out:
return err;
}
EXPORT_SYMBOL_GPL(dev_change_net_namespace);
static int dev_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *ocpu)
{
struct sk_buff **list_skb;
struct Qdisc **list_net;
struct sk_buff *skb;
unsigned int cpu, oldcpu = (unsigned long)ocpu;
struct softnet_data *sd, *oldsd;
if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607
5608
5609
5610
5611
5612
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
return NOTIFY_OK;
local_irq_disable();
cpu = smp_processor_id();
sd = &per_cpu(softnet_data, cpu);
oldsd = &per_cpu(softnet_data, oldcpu);
/* Find end of our completion_queue. */
list_skb = &sd->completion_queue;
while (*list_skb)
list_skb = &(*list_skb)->next;
/* Append completion queue from offline CPU. */
*list_skb = oldsd->completion_queue;
oldsd->completion_queue = NULL;
/* Find end of our output_queue. */
list_net = &sd->output_queue;
while (*list_net)
list_net = &(*list_net)->next_sched;
/* Append output queue from offline CPU. */
*list_net = oldsd->output_queue;
oldsd->output_queue = NULL;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
netif_rx(skb);
return NOTIFY_OK;
}
* netdev_increment_features - increment feature set by one
* @all: current feature set
* @one: new feature set
* @mask: mask feature set
*
* Computes a new feature set after adding a device with feature set
* @one to the master device with current feature set @all. Will not
* enable anything that is off in @mask. Returns the new feature set.
unsigned long netdev_increment_features(unsigned long all, unsigned long one,
unsigned long mask)
{
/* If device needs checksumming, downgrade to it. */
if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
else if (mask & NETIF_F_ALL_CSUM) {
/* If one device supports v4/v6 checksumming, set for all. */
if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
!(all & NETIF_F_GEN_CSUM)) {
all &= ~NETIF_F_ALL_CSUM;
all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
}
/* If one device supports hw checksumming, set for all. */
if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
all &= ~NETIF_F_ALL_CSUM;
all |= NETIF_F_HW_CSUM;
}
}
one |= all & NETIF_F_ONE_FOR_ALL;
all &= one | NETIF_F_LLTX | NETIF_F_GSO | NETIF_F_UFO;
all |= one & mask & NETIF_F_ONE_FOR_ALL;
return all;
}
EXPORT_SYMBOL(netdev_increment_features);
static struct hlist_head *netdev_create_hash(void)
{
int i;
struct hlist_head *hash;
hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
if (hash != NULL)
for (i = 0; i < NETDEV_HASHENTRIES; i++)
INIT_HLIST_HEAD(&hash[i]);
return hash;
}
/* Initialize per network namespace state */
static int __net_init netdev_init(struct net *net)
{
INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)
goto err_name;
net->dev_index_head = netdev_create_hash();
if (net->dev_index_head == NULL)
goto err_idx;
return 0;
err_idx:
kfree(net->dev_name_head);
err_name:
return -ENOMEM;
}
/**
* netdev_drivername - network driver for the device
* @dev: network device
* @buffer: buffer for resulting name
* @len: size of buffer
*
* Determine network driver for device.
*/
char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
const struct device_driver *driver;
const struct device *parent;
if (len <= 0 || !buffer)
return buffer;
buffer[0] = 0;
parent = dev->dev.parent;
if (!parent)
return buffer;
driver = parent->driver;
if (driver && driver->name)
strlcpy(buffer, driver->name, len);
return buffer;
}
static void __net_exit netdev_exit(struct net *net)
{
kfree(net->dev_name_head);
kfree(net->dev_index_head);
}
static struct pernet_operations __net_initdata netdev_net_ops = {
.init = netdev_init,
.exit = netdev_exit,
};
static void __net_exit default_device_exit(struct net *net)
struct net_device *dev, *aux;
* Push all migratable network devices back to the
* initial network namespace
*/
rtnl_lock();
for_each_netdev_safe(net, dev, aux) {
int err;
char fb_name[IFNAMSIZ];
/* Ignore unmoveable devices (i.e. loopback) */
if (dev->features & NETIF_F_NETNS_LOCAL)
continue;
/* Leave virtual devices for the generic cleanup */
if (dev->rtnl_link_ops)
continue;
/* Push remaing network devices to init_net */
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
__func__, dev->name, err);
}
}
rtnl_unlock();
}
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788
5789
5790
5791
5792
static void __net_exit default_device_exit_batch(struct list_head *net_list)
{
/* At exit all network devices most be removed from a network
* namespace. Do this in the reverse order of registeration.
* Do this across as many network namespaces as possible to
* improve batching efficiency.
*/
struct net_device *dev;
struct net *net;
LIST_HEAD(dev_kill_list);
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops)
dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
else
unregister_netdevice_queue(dev, &dev_kill_list);
}
}
unregister_netdevice_many(&dev_kill_list);
rtnl_unlock();
}
static struct pernet_operations __net_initdata default_device_ops = {
.exit = default_device_exit,
.exit_batch = default_device_exit_batch,
};
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
* present) and leaves us with a valid list of present and active devices.
*
*/
/*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
*/
static int __init net_dev_init(void)
{
int i, rc = -ENOMEM;
BUG_ON(!dev_boot_phase);
if (dev_proc_init())
goto out;
for (i = 0; i < PTYPE_HASH_SIZE; i++)
if (register_pernet_subsys(&netdev_net_ops))
goto out;
/*
* Initialise the packet receive queues.
*/
for_each_possible_cpu(i) {
struct softnet_data *queue;
queue = &per_cpu(softnet_data, i);
skb_queue_head_init(&queue->input_pkt_queue);
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
queue->backlog.gro_list = NULL;
queue->backlog.gro_count = 0;
/* The loopback device is special if any other network devices
* is present in a network namespace the loopback device must
* be present. Since we now dynamically allocate and free the
* loopback device ensure this invariant is maintained by
* keeping the loopback device as the first device on the
* list of network devices. Ensuring the loopback devices
* is the first device that appears and the last network device
* that disappears.
*/
if (register_pernet_device(&loopback_net_ops))
goto out;
if (register_pernet_device(&default_device_ops))
goto out;
open_softirq(NET_TX_SOFTIRQ, net_tx_action);
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
hotcpu_notifier(dev_cpu_callback, 0);
dst_init();
dev_mcast_init();
rc = 0;
out:
return rc;
}
subsys_initcall(net_dev_init);
static int __init initialize_hashrnd(void)
{
get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
return 0;
}
late_initcall_sync(initialize_hashrnd);