Newer
Older
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
case SIOCSIFMAP:
if (dev->set_config) {
if (!netif_device_present(dev))
return -ENODEV;
return dev->set_config(dev, &ifr->ifr_map);
}
return -EOPNOTSUPP;
case SIOCADDMULTI:
if (!dev->set_multicast_list ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
dev->addr_len, 1);
case SIOCDELMULTI:
if (!dev->set_multicast_list ||
ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
dev->addr_len, 1);
case SIOCGIFINDEX:
ifr->ifr_ifindex = dev->ifindex;
return 0;
case SIOCGIFTXQLEN:
ifr->ifr_qlen = dev->tx_queue_len;
return 0;
case SIOCSIFTXQLEN:
if (ifr->ifr_qlen < 0)
return -EINVAL;
dev->tx_queue_len = ifr->ifr_qlen;
return 0;
case SIOCSIFNAME:
ifr->ifr_newname[IFNAMSIZ-1] = '\0';
return dev_change_name(dev, ifr->ifr_newname);
/*
* Unknown or private ioctl
*/
default:
if ((cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15) ||
cmd == SIOCBONDENSLAVE ||
cmd == SIOCBONDRELEASE ||
cmd == SIOCBONDSETHWADDR ||
cmd == SIOCBONDSLAVEINFOQUERY ||
cmd == SIOCBONDINFOQUERY ||
cmd == SIOCBONDCHANGEACTIVE ||
cmd == SIOCGMIIPHY ||
cmd == SIOCGMIIREG ||
cmd == SIOCSMIIREG ||
cmd == SIOCBRADDIF ||
cmd == SIOCBRDELIF ||
cmd == SIOCWANDEV) {
err = -EOPNOTSUPP;
if (dev->do_ioctl) {
if (netif_device_present(dev))
err = dev->do_ioctl(dev, ifr,
cmd);
else
err = -ENODEV;
}
} else
err = -EINVAL;
}
return err;
}
/*
* This function handles all "interface"-type I/O control requests. The actual
* 'doing' part of this is dev_ifsioc above.
*/
/**
* dev_ioctl - network device ioctl
* @cmd: command to issue
* @arg: pointer to a struct ifreq in user space
*
* Issue ioctl functions to devices. This is normally called by the
* user space syscall interfaces but can sometimes be useful for
* other purposes. The return value is the return from the syscall if
* positive or a negative errno code on error.
*/
int dev_ioctl(unsigned int cmd, void __user *arg)
{
struct ifreq ifr;
int ret;
char *colon;
/* One special case: SIOCGIFCONF takes ifconf argument
and requires shared lock, because it sleeps writing
to user space.
*/
if (cmd == SIOCGIFCONF) {
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
return ret;
}
if (cmd == SIOCGIFNAME)
return dev_ifname((struct ifreq __user *)arg);
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
ifr.ifr_name[IFNAMSIZ-1] = 0;
colon = strchr(ifr.ifr_name, ':');
if (colon)
*colon = 0;
/*
* See which interface the caller is talking about.
*/
switch (cmd) {
/*
* These ioctl calls:
* - can be done by all.
* - atomic and do not require locking.
* - return a value
*/
case SIOCGIFFLAGS:
case SIOCGIFMETRIC:
case SIOCGIFMTU:
case SIOCGIFHWADDR:
case SIOCGIFSLAVE:
case SIOCGIFMAP:
case SIOCGIFINDEX:
case SIOCGIFTXQLEN:
dev_load(ifr.ifr_name);
read_lock(&dev_base_lock);
ret = dev_ifsioc(&ifr, cmd);
read_unlock(&dev_base_lock);
if (!ret) {
if (colon)
*colon = ':';
if (copy_to_user(arg, &ifr,
sizeof(struct ifreq)))
ret = -EFAULT;
}
return ret;
case SIOCETHTOOL:
dev_load(ifr.ifr_name);
rtnl_lock();
ret = dev_ethtool(&ifr);
rtnl_unlock();
if (!ret) {
if (colon)
*colon = ':';
if (copy_to_user(arg, &ifr,
sizeof(struct ifreq)))
ret = -EFAULT;
}
return ret;
/*
* These ioctl calls:
* - require superuser power.
* - require strict serialization.
* - return a value
*/
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSIFNAME:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
dev_load(ifr.ifr_name);
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
if (!ret) {
if (colon)
*colon = ':';
if (copy_to_user(arg, &ifr,
sizeof(struct ifreq)))
ret = -EFAULT;
}
return ret;
/*
* These ioctl calls:
* - require superuser power.
* - require strict serialization.
* - do not return a value
*/
case SIOCSIFFLAGS:
case SIOCSIFMETRIC:
case SIOCSIFMTU:
case SIOCSIFMAP:
case SIOCSIFHWADDR:
case SIOCSIFSLAVE:
case SIOCADDMULTI:
case SIOCDELMULTI:
case SIOCSIFHWBROADCAST:
case SIOCSIFTXQLEN:
case SIOCSMIIREG:
case SIOCBONDENSLAVE:
case SIOCBONDRELEASE:
case SIOCBONDSETHWADDR:
case SIOCBONDCHANGEACTIVE:
case SIOCBRADDIF:
case SIOCBRDELIF:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/* fall through */
case SIOCBONDSLAVEINFOQUERY:
case SIOCBONDINFOQUERY:
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
dev_load(ifr.ifr_name);
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
return ret;
case SIOCGIFMEM:
/* Get the per device memory space. We can add this but
* currently do not support it */
case SIOCSIFMEM:
/* Set the per device memory buffer space.
* Not applicable in our case */
case SIOCSIFLINK:
return -EINVAL;
/*
* Unknown or private ioctl.
*/
default:
if (cmd == SIOCWANDEV ||
(cmd >= SIOCDEVPRIVATE &&
cmd <= SIOCDEVPRIVATE + 15)) {
dev_load(ifr.ifr_name);
rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
rtnl_unlock();
if (!ret && copy_to_user(arg, &ifr,
sizeof(struct ifreq)))
ret = -EFAULT;
return ret;
}
/* Take care of Wireless Extensions */
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
return wext_handle_ioctl(&ifr, cmd, arg);
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
return -EINVAL;
}
}
/**
* dev_new_index - allocate an ifindex
*
* Returns a suitable unique value for a new device interface
* number. The caller must hold the rtnl semaphore or the
* dev_base_lock to be sure it remains unique.
*/
static int dev_new_index(void)
{
static int ifindex;
for (;;) {
if (++ifindex <= 0)
ifindex = 1;
if (!__dev_get_by_index(ifindex))
return ifindex;
}
}
static int dev_boot_phase = 1;
/* Delayed registration/unregisteration */
static DEFINE_SPINLOCK(net_todo_list_lock);
static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
static void net_set_todo(struct net_device *dev)
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
{
spin_lock(&net_todo_list_lock);
list_add_tail(&dev->todo_list, &net_todo_list);
spin_unlock(&net_todo_list_lock);
}
/**
* register_netdevice - register a network device
* @dev: device to register
*
* Take a completed network device structure and add it to the kernel
* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
* Callers must hold the rtnl semaphore. You may want
* register_netdev() instead of this.
*
* BUGS:
* The locking appears insufficient to guarantee two parallel registers
* will not get the same name.
*/
int register_netdevice(struct net_device *dev)
{
struct hlist_head *head;
struct hlist_node *p;
int ret;
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
might_sleep();
/* When net_device's are persistent, this will be fatal. */
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
spin_lock_init(&dev->queue_lock);
netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
dev->xmit_lock_owner = -1;
spin_lock_init(&dev->ingress_lock);
dev->iflink = -1;
/* Init, if this function is available */
if (dev->init) {
ret = dev->init(dev);
if (ret) {
if (ret > 0)
ret = -EIO;
if (!dev_valid_name(dev->name)) {
ret = -EINVAL;
}
dev->ifindex = dev_new_index();
if (dev->iflink == -1)
dev->iflink = dev->ifindex;
/* Check for existence of name */
head = dev_name_hash(dev->name);
hlist_for_each(p, head) {
struct net_device *d
= hlist_entry(p, struct net_device, name_hlist);
if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
ret = -EEXIST;
/* Fix illegal checksum combinations */
if ((dev->features & NETIF_F_HW_CSUM) &&
(dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
dev->name);
dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
}
if ((dev->features & NETIF_F_NO_CSUM) &&
(dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
dev->name);
dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
}
/* Fix illegal SG+CSUM combinations. */
if ((dev->features & NETIF_F_SG) &&
!(dev->features & NETIF_F_ALL_CSUM)) {
printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
dev->name);
dev->features &= ~NETIF_F_SG;
}
/* TSO requires that SG is present as well. */
if ((dev->features & NETIF_F_TSO) &&
!(dev->features & NETIF_F_SG)) {
printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
dev->name);
dev->features &= ~NETIF_F_TSO;
}
if (dev->features & NETIF_F_UFO) {
if (!(dev->features & NETIF_F_HW_CSUM)) {
printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
"NETIF_F_HW_CSUM feature.\n",
dev->name);
dev->features &= ~NETIF_F_UFO;
}
if (!(dev->features & NETIF_F_SG)) {
printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
"NETIF_F_SG feature.\n",
dev->name);
dev->features &= ~NETIF_F_UFO;
}
}
/*
* nil rebuild_header routine,
* that should be never called and used as just bug trap.
*/
if (!dev->rebuild_header)
dev->rebuild_header = default_rebuild_header;
ret = netdev_register_sysfs(dev);
if (ret)
dev->reg_state = NETREG_REGISTERED;
/*
* Default initial state at registry is that the
* device is present.
*/
set_bit(__LINK_STATE_PRESENT, &dev->state);
dev_init_scheduler(dev);
write_lock_bh(&dev_base_lock);
list_add_tail(&dev->dev_list, &dev_base_head);
hlist_add_head(&dev->name_hlist, head);
hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
dev_hold(dev);
write_unlock_bh(&dev_base_lock);
/* Notify protocols, that a new device appeared. */
raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
ret = 0;
out:
return ret;
}
/**
* register_netdev - register a network device
* @dev: device to register
*
* Take a completed network device structure and add it to the kernel
* interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
* chain. 0 is returned on success. A negative errno code is returned
* on a failure to set up the device, or if the name is a duplicate.
*
* This is a wrapper around register_netdevice that takes the rtnl semaphore
* and expands the device name if you passed a format string to
* alloc_netdev.
*/
int register_netdev(struct net_device *dev)
{
int err;
rtnl_lock();
/*
* If the name is a format string the caller wants us to do a
* name allocation.
*/
if (strchr(dev->name, '%')) {
err = dev_alloc_name(dev, dev->name);
if (err < 0)
goto out;
}
err = register_netdevice(dev);
out:
rtnl_unlock();
return err;
}
EXPORT_SYMBOL(register_netdev);
/*
* netdev_wait_allrefs - wait until all references are gone.
*
* This is called when unregistering network devices.
*
* Any protocol or device that holds a reference should register
* for netdevice notification, and cleanup and put back the
* reference if they receive an UNREGISTER event.
* We can get stuck here if buggy protocols don't correctly
*/
static void netdev_wait_allrefs(struct net_device *dev)
{
unsigned long rebroadcast_time, warning_time;
rebroadcast_time = warning_time = jiffies;
while (atomic_read(&dev->refcnt) != 0) {
if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
raw_notifier_call_chain(&netdev_chain,
NETDEV_UNREGISTER, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) {
/* We must not have linkwatch events
* pending on unregister. If this
* happens, we simply run the queue
* unscheduled, resulting in a noop
* for this device.
*/
linkwatch_run_queue();
}
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
rebroadcast_time = jiffies;
}
msleep(250);
if (time_after(jiffies, warning_time + 10 * HZ)) {
printk(KERN_EMERG "unregister_netdevice: "
"waiting for %s to become free. Usage "
"count = %d\n",
dev->name, atomic_read(&dev->refcnt));
warning_time = jiffies;
}
}
}
/* The sequence is:
*
* rtnl_lock();
* ...
* register_netdevice(x1);
* register_netdevice(x2);
* ...
* unregister_netdevice(y1);
* unregister_netdevice(y2);
* ...
* rtnl_unlock();
* free_netdev(y1);
* free_netdev(y2);
*
* We are invoked by rtnl_unlock() after it drops the semaphore.
* This allows us to deal with problems:
* 1) We can delete sysfs objects which invoke hotplug
* without deadlocking with linkwatch via keventd.
* 2) Since we run with the RTNL semaphore not held, we can sleep
* safely in order to wait for the netdev refcnt to drop to zero.
*/
struct list_head list;
/* Need to guard against multiple cpu's getting out of order. */
/* Not safe to do outside the semaphore. We must not return
* until all unregister events invoked by the local processor
* have been completed (either by this todo run, or one on
* another cpu).
*/
if (list_empty(&net_todo_list))
goto out;
/* Snapshot list, allow later requests */
spin_lock(&net_todo_list_lock);
list_replace_init(&net_todo_list, &list);
while (!list_empty(&list)) {
struct net_device *dev
= list_entry(list.next, struct net_device, todo_list);
list_del(&dev->todo_list);
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
printk(KERN_ERR "network todo '%s' but state %d\n",
dev->name, dev->reg_state);
dump_stack();
continue;
}
dev->reg_state = NETREG_UNREGISTERED;
netdev_wait_allrefs(dev);
/* paranoia */
BUG_ON(atomic_read(&dev->refcnt));
BUG_TRAP(!dev->ip_ptr);
BUG_TRAP(!dev->ip6_ptr);
BUG_TRAP(!dev->dn_ptr);
if (dev->destructor)
dev->destructor(dev);
/* Free network device */
kobject_put(&dev->dev.kobj);
static struct net_device_stats *internal_stats(struct net_device *dev)
return &dev->stats;
* alloc_netdev_mq - allocate network device
* @sizeof_priv: size of private data to allocate space for
* @name: device name format string
* @setup: callback to initialize device
* @queue_count: the number of subqueues to allocate
*
* Allocates a struct net_device with private data area for driver use
* and performs basic initialization. Also allocates subquue structs
* for each queue on the device at the end of the netdevice.
struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
void (*setup)(struct net_device *), unsigned int queue_count)
{
void *p;
struct net_device *dev;
int alloc_size;
BUG_ON(strlen(name) >= sizeof(dev->name));
/* ensure 32-byte alignment of both the device and private area */
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
(sizeof(struct net_device_subqueue) * queue_count)) &
~NETDEV_ALIGN_CONST;
p = kzalloc(alloc_size, GFP_KERNEL);
printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
return NULL;
}
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
dev->padded = (char *)dev - (char *)p;
if (sizeof_priv) {
dev->priv = ((char *)dev +
((sizeof(struct net_device) +
(sizeof(struct net_device_subqueue) *
queue_count) + NETDEV_ALIGN_CONST)
& ~NETDEV_ALIGN_CONST));
}
dev->egress_subqueue_count = queue_count;
dev->get_stats = internal_stats;
setup(dev);
strcpy(dev->name, name);
return dev;
}
EXPORT_SYMBOL(alloc_netdev_mq);
/**
* free_netdev - free network device
* @dev: device
*
* This function does the last stage of destroying an allocated device
* interface. The reference to the device object is released.
* If this is the last reference then it will be freed.
*/
void free_netdev(struct net_device *dev)
{
#ifdef CONFIG_SYSFS
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED) {
kfree((char *)dev - dev->padded);
return;
}
BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
dev->reg_state = NETREG_RELEASED;

Greg Kroah-Hartman
committed
/* will free via device release */
put_device(&dev->dev);
#else
kfree((char *)dev - dev->padded);
#endif
}
synchronize_rcu();
}
/**
* unregister_netdevice - remove device from the kernel
* @dev: device
*
* This function shuts down a device interface and removes it
* from the kernel tables. On success 0 is returned, on a failure
* a negative errno code is returned.
*
* Callers must hold the rtnl semaphore. You may want
* unregister_netdev() instead of this.
*/
void unregister_netdevice(struct net_device *dev)
{
BUG_ON(dev_boot_phase);
ASSERT_RTNL();
/* Some devices call without registering for initialization unwind. */
if (dev->reg_state == NETREG_UNINITIALIZED) {
printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
"was registered\n", dev->name, dev);
}
BUG_ON(dev->reg_state != NETREG_REGISTERED);
/* If device is running, close it first. */
if (dev->flags & IFF_UP)
dev_close(dev);
/* And unlink it from device chain. */
write_lock_bh(&dev_base_lock);
list_del(&dev->dev_list);
hlist_del(&dev->name_hlist);
hlist_del(&dev->index_hlist);
write_unlock_bh(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERING;
synchronize_net();
/* Shutdown queueing discipline. */
dev_shutdown(dev);
/* Notify protocols, that we are about to destroy
this device. They should clean all the things.
*/
raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
* Flush the unicast and multicast chains
dev_unicast_discard(dev);
dev_mc_discard(dev);
if (dev->uninit)
dev->uninit(dev);
/* Notifier chain MUST detach us from master device. */
BUG_TRAP(!dev->master);
/* Remove entries from sysfs */
netdev_unregister_sysfs(dev);
3766
3767
3768
3769
3770
3771
3772
3773
3774
3775
3776
3777
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803
3804
/* Finish processing unregister after unlock */
net_set_todo(dev);
synchronize_net();
dev_put(dev);
}
/**
* unregister_netdev - remove device from the kernel
* @dev: device
*
* This function shuts down a device interface and removes it
* from the kernel tables. On success 0 is returned, on a failure
* a negative errno code is returned.
*
* This is just a wrapper for unregister_netdevice that takes
* the rtnl semaphore. In general you want to use this and not
* unregister_netdevice.
*/
void unregister_netdev(struct net_device *dev)
{
rtnl_lock();
unregister_netdevice(dev);
rtnl_unlock();
}
EXPORT_SYMBOL(unregister_netdev);
static int dev_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *ocpu)
{
struct sk_buff **list_skb;
struct net_device **list_net;
struct sk_buff *skb;
unsigned int cpu, oldcpu = (unsigned long)ocpu;
struct softnet_data *sd, *oldsd;
if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
return NOTIFY_OK;
local_irq_disable();
cpu = smp_processor_id();
sd = &per_cpu(softnet_data, cpu);
oldsd = &per_cpu(softnet_data, oldcpu);
/* Find end of our completion_queue. */
list_skb = &sd->completion_queue;
while (*list_skb)
list_skb = &(*list_skb)->next;
/* Append completion queue from offline CPU. */
*list_skb = oldsd->completion_queue;
oldsd->completion_queue = NULL;
/* Find end of our output_queue. */
list_net = &sd->output_queue;
while (*list_net)
list_net = &(*list_net)->next_sched;
/* Append output queue from offline CPU. */
*list_net = oldsd->output_queue;
oldsd->output_queue = NULL;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
netif_rx(skb);
return NOTIFY_OK;
}
#ifdef CONFIG_NET_DMA
/**
* net_dma_rebalance -
* This is called when the number of channels allocated to the net_dma_client
* changes. The net_dma_client tries to have one DMA channel per CPU.
*/
static void net_dma_rebalance(struct net_dma *net_dma)
unsigned int cpu, i, n, chan_idx;
struct dma_chan *chan;
if (cpus_empty(net_dma->channel_mask)) {
for_each_online_cpu(cpu)
rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
return;
}
i = 0;
cpu = first_cpu(cpu_online_map);
for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
chan = net_dma->channels[chan_idx];
n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
+ (i < (num_online_cpus() %
cpus_weight(net_dma->channel_mask)) ? 1 : 0));
per_cpu(softnet_data, cpu).net_dma = chan;
cpu = next_cpu(cpu, cpu_online_map);
n--;
}
i++;
}
}
/**
* netdev_dma_event - event callback for the net_dma_client
* @client: should always be net_dma_client
* @chan: DMA channel for the event
* @event: event type
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
static enum dma_state_client
netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
enum dma_state state)
{
int i, found = 0, pos = -1;
struct net_dma *net_dma =
container_of(client, struct net_dma, client);
enum dma_state_client ack = DMA_DUP; /* default: take no action */
spin_lock(&net_dma->lock);
switch (state) {
case DMA_RESOURCE_AVAILABLE:
for (i = 0; i < NR_CPUS; i++)
if (net_dma->channels[i] == chan) {
found = 1;
break;
} else if (net_dma->channels[i] == NULL && pos < 0)
pos = i;
if (!found && pos >= 0) {
ack = DMA_ACK;
net_dma->channels[pos] = chan;
cpu_set(pos, net_dma->channel_mask);
net_dma_rebalance(net_dma);
}
break;
case DMA_RESOURCE_REMOVED:
for (i = 0; i < NR_CPUS; i++)
if (net_dma->channels[i] == chan) {
found = 1;
pos = i;
break;
}
if (found) {
ack = DMA_ACK;
cpu_clear(pos, net_dma->channel_mask);
net_dma->channels[i] = NULL;
net_dma_rebalance(net_dma);
}
break;
default:
break;
}
spin_unlock(&net_dma->lock);
return ack;
}
/**
* netdev_dma_regiser - register the networking subsystem as a DMA client
*/
static int __init netdev_dma_register(void)
{
spin_lock_init(&net_dma.lock);
dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
dma_async_client_register(&net_dma.client);
dma_async_client_chan_request(&net_dma.client);
return 0;
}
#else
static int __init netdev_dma_register(void) { return -ENODEV; }
#endif /* CONFIG_NET_DMA */
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
* present) and leaves us with a valid list of present and active devices.
*
*/
/*
* This is called single threaded during boot, so no need
* to take the rtnl semaphore.
*/
static int __init net_dev_init(void)
{
int i, rc = -ENOMEM;
BUG_ON(!dev_boot_phase);
if (dev_proc_init())
goto out;
if (netdev_sysfs_init())
goto out;
INIT_LIST_HEAD(&ptype_all);
INIT_LIST_HEAD(&ptype_base[i]);
for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
INIT_HLIST_HEAD(&dev_name_head[i]);
for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
INIT_HLIST_HEAD(&dev_index_head[i]);
/*
* Initialise the packet receive queues.
*/
for_each_possible_cpu(i) {
struct softnet_data *queue;
queue = &per_cpu(softnet_data, i);
skb_queue_head_init(&queue->input_pkt_queue);
queue->completion_queue = NULL;
INIT_LIST_HEAD(&queue->poll_list);
set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
queue->backlog_dev.weight = weight_p;
queue->backlog_dev.poll = process_backlog;
atomic_set(&queue->backlog_dev.refcnt, 1);
}
netdev_dma_register();