Newer
Older
/**
* call_netdevice_notifiers - call all network notifier blocks
* @val: value passed unmodified to notifier function
* @v: pointer passed unmodified to notifier function
*
* Call all network notifier blocks. Parameters and return value
* are as for raw_notifier_call_chain().
*/
int call_netdevice_notifiers(unsigned long val, void *v)
{
return raw_notifier_call_chain(&netdev_chain, val, v);
}
/* When > 0 there are consumers of rx skb time stamps */
static atomic_t netstamp_needed = ATOMIC_INIT(0);
void net_enable_timestamp(void)
{
atomic_inc(&netstamp_needed);
}
void net_disable_timestamp(void)
{
atomic_dec(&netstamp_needed);
}
void __net_timestamp(struct sk_buff *skb)
{
struct timeval tv;
do_gettimeofday(&tv);
skb_set_timestamp(skb, &tv);
}
EXPORT_SYMBOL(__net_timestamp);
static inline void net_timestamp(struct sk_buff *skb)
__net_timestamp(skb);
skb->tstamp.off_sec = 0;
skb->tstamp.off_usec = 0;
}
}
/*
* Support routine. Sends outgoing frames to any network
* taps currently in use.
*/
static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
net_timestamp(skb);
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
/* Never send packets back to the socket
* they originated from - MvS (miquels@drinkel.ow.org)
*/
if ((ptype->dev == dev || !ptype->dev) &&
(ptype->af_packet_priv == NULL ||
(struct sock *)ptype->af_packet_priv != skb->sk)) {
struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
if (!skb2)
break;
/* skb->nh should be correctly
set by sender, so that the second statement is
just protection against buggy protocols.
*/
skb2->mac.raw = skb2->data;
if (skb2->nh.raw < skb2->data ||
skb2->nh.raw > skb2->tail) {
if (net_ratelimit())
printk(KERN_CRIT "protocol %04x is "
"buggy, dev %s\n",
skb2->protocol, dev->name);
skb2->nh.raw = skb2->data;
}
skb2->h.raw = skb2->nh.raw;
skb2->pkt_type = PACKET_OUTGOING;
ptype->func(skb2, skb->dev, ptype, skb->dev);
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
void __netif_schedule(struct net_device *dev)
{
if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
unsigned long flags;
struct softnet_data *sd;
local_irq_save(flags);
sd = &__get_cpu_var(softnet_data);
dev->next_sched = sd->output_queue;
sd->output_queue = dev;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
}
}
EXPORT_SYMBOL(__netif_schedule);
void __netif_rx_schedule(struct net_device *dev)
{
unsigned long flags;
local_irq_save(flags);
dev_hold(dev);
list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
if (dev->quota < 0)
dev->quota += dev->weight;
else
dev->quota = dev->weight;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
local_irq_restore(flags);
}
EXPORT_SYMBOL(__netif_rx_schedule);
void dev_kfree_skb_any(struct sk_buff *skb)
{
if (in_irq() || irqs_disabled())
dev_kfree_skb_irq(skb);
else
dev_kfree_skb(skb);
}
EXPORT_SYMBOL(dev_kfree_skb_any);
/* Hot-plugging. */
void netif_device_detach(struct net_device *dev)
{
if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
netif_running(dev)) {
netif_stop_queue(dev);
}
}
EXPORT_SYMBOL(netif_device_detach);
void netif_device_attach(struct net_device *dev)
{
if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
netif_running(dev)) {
netif_wake_queue(dev);
__netdev_watchdog_up(dev);
}
}
EXPORT_SYMBOL(netif_device_attach);
/*
* Invalidate hardware checksum when packet is to be mangled, and
* complete checksum manually on outgoing path.
*/
int skb_checksum_help(struct sk_buff *skb, int inward)
{
unsigned int csum;
int ret = 0, offset = skb->h.raw - skb->data;
if (inward) {
skb->ip_summed = CHECKSUM_NONE;
goto out;
}
if (skb_cloned(skb)) {
ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
if (ret)
goto out;
}
BUG_ON(offset > (int)skb->len);
csum = skb_checksum(skb, offset, skb->len-offset, 0);
offset = skb->tail - skb->h.raw;
BUG_ON(offset <= 0);
BUG_ON(skb->csum + 2 > offset);
*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
skb->ip_summed = CHECKSUM_NONE;
out:
return ret;
}
/**
* skb_gso_segment - Perform segmentation on skb.
* @skb: buffer to segment
* @features: features for the output path (see dev->features)
*
* This function segments the given skb and returns a list of segments.
*
* It may return NULL if the skb requires no segmentation. This is
* only possible when GSO is used for verifying header integrity.
struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
{
struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
struct packet_type *ptype;
int type = skb->protocol;
BUG_ON(skb_shinfo(skb)->frag_list);
BUG_ON(skb->ip_summed != CHECKSUM_HW);
skb->mac.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->data;
__skb_pull(skb, skb->mac_len);
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
segs = ptype->gso_segment(skb, features);
break;
}
}
rcu_read_unlock();
__skb_push(skb, skb->data - skb->mac.raw);
return segs;
}
EXPORT_SYMBOL(skb_gso_segment);
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
void netdev_rx_csum_fault(struct net_device *dev)
{
if (net_ratelimit()) {
printk(KERN_ERR "%s: hw csum failure.\n",
dev ? dev->name : "<unknown>");
dump_stack();
}
}
EXPORT_SYMBOL(netdev_rx_csum_fault);
#endif
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
#ifdef CONFIG_HIGHMEM
/* Actually, we should eliminate this check as soon as we know, that:
* 1. IOMMU is present and allows to map all the memory.
* 2. No high memory really exists on this machine.
*/
static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
{
int i;
if (dev->features & NETIF_F_HIGHDMA)
return 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
if (PageHighMem(skb_shinfo(skb)->frags[i].page))
return 1;
return 0;
}
#else
#define illegal_highdma(dev, skb) (0)
#endif
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
struct dev_gso_cb {
void (*destructor)(struct sk_buff *skb);
};
#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
static void dev_gso_skb_destructor(struct sk_buff *skb)
{
struct dev_gso_cb *cb;
do {
struct sk_buff *nskb = skb->next;
skb->next = nskb->next;
nskb->next = NULL;
kfree_skb(nskb);
} while (skb->next);
cb = DEV_GSO_CB(skb);
if (cb->destructor)
cb->destructor(skb);
}
/**
* dev_gso_segment - Perform emulated hardware segmentation on skb.
* @skb: buffer to segment
*
* This function segments the given skb and stores the list of segments
* in skb->next.
*/
static int dev_gso_segment(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct sk_buff *segs;
int features = dev->features & ~(illegal_highdma(dev, skb) ?
NETIF_F_SG : 0);
segs = skb_gso_segment(skb, features);
/* Verifying header integrity only. */
if (!segs)
return 0;
if (unlikely(IS_ERR(segs)))
return PTR_ERR(segs);
skb->next = segs;
DEV_GSO_CB(skb)->destructor = skb->destructor;
skb->destructor = dev_gso_skb_destructor;
return 0;
}
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
if (likely(!skb->next)) {
if (netdev_nit)
dev_queue_xmit_nit(skb, dev);
if (netif_needs_gso(dev, skb)) {
if (unlikely(dev_gso_segment(skb)))
goto out_kfree_skb;
if (skb->next)
goto gso;
}
return dev->hard_start_xmit(skb, dev);
do {
struct sk_buff *nskb = skb->next;
int rc;
skb->next = nskb->next;
nskb->next = NULL;
rc = dev->hard_start_xmit(nskb, dev);
if (unlikely(rc)) {
nskb->next = skb->next;
skb->next = nskb;
return rc;
}
if (unlikely(netif_queue_stopped(dev) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
skb->destructor = DEV_GSO_CB(skb)->destructor;
out_kfree_skb:
kfree_skb(skb);
return 0;
}
#define HARD_TX_LOCK(dev, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
} \
}
#define HARD_TX_UNLOCK(dev) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
} \
}
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
*
* Queue a buffer for transmission to a network device. The caller must
* have set the device and priority and built the buffer before calling
* this function. The function can be called from an interrupt.
*
* A negative errno code is returned on a failure. A success does not
* guarantee the frame will be transmitted as it may be dropped due
* to congestion or traffic shaping.
*
* -----------------------------------------------------------------------------------
* I notice this method can also return errors from the queue disciplines,
* including NET_XMIT_DROP, which is a positive value. So, errors can also
* be positive.
*
* Regardless of the return value, the skb is consumed, so it is currently
* difficult to retry a send to this method. (You can bump the ref count
* before sending to hold a reference for retry if you are careful.)
*
* When calling this method, interrupts MUST be enabled. This is because
* the BH enable code must have IRQs enabled so that it will not deadlock.
* --BLG
*/
int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct Qdisc *q;
int rc = -ENOMEM;
/* GSO will handle the following emulations directly. */
if (netif_needs_gso(dev, skb))
goto gso;
if (skb_shinfo(skb)->frag_list &&
!(dev->features & NETIF_F_FRAGLIST) &&
goto out_kfree_skb;
/* Fragmented skb is linearized if device does not support SG,
* or if at least one of fragments is in highmem and device
* does not support DMA from it.
*/
if (skb_shinfo(skb)->nr_frags &&
(!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
goto out_kfree_skb;
/* If packet is not checksummed and device does not support
* checksumming for this protocol, complete checksumming here.
*/
if (skb->ip_summed == CHECKSUM_HW &&
(!(dev->features & NETIF_F_GEN_CSUM) &&
(!(dev->features & NETIF_F_IP_CSUM) ||
skb->protocol != htons(ETH_P_IP))))
if (skb_checksum_help(skb, 0))
goto out_kfree_skb;
spin_lock_prefetch(&dev->queue_lock);
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
/* Updates of qdisc are serialized by queue_lock.
* The struct Qdisc which is pointed to by qdisc is now a
* rcu structure - it may be accessed without acquiring
* a lock (but the structure may be stale.) The freeing of the
* qdisc will be deferred until it's known that there are no
* more references to it.
*
* If the qdisc has an enqueue function, we still need to
* hold the queue_lock before calling it, since queue_lock
* also serializes access to the device queue.
*/
q = rcu_dereference(dev->qdisc);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
#endif
if (q->enqueue) {
/* Grab device queue */
spin_lock(&dev->queue_lock);
rc = q->enqueue(skb, q);
qdisc_run(dev);
spin_unlock(&dev->queue_lock);
rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
goto out;
}
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
Really, it is unlikely that netif_tx_lock protection is necessary
here. (f.e. loopback and IP tunnels are clean ignoring statistics
counters.)
However, it is possible, that they rely on protection
made by us here.
Check this and shot the lock. It is not prone from deadlocks.
Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
if (dev->xmit_lock_owner != cpu) {
HARD_TX_LOCK(dev, cpu);
if (!netif_queue_stopped(dev)) {
rc = 0;
if (!dev_hard_start_xmit(skb, dev)) {
HARD_TX_UNLOCK(dev);
goto out;
}
}
HARD_TX_UNLOCK(dev);
if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to "
"queue packet!\n", dev->name);
} else {
/* Recursion is detected! It is possible,
* unfortunately */
if (net_ratelimit())
printk(KERN_CRIT "Dead loop on virtual device "
"%s, fix it urgently!\n", dev->name);
}
}
rc = -ENETDOWN;
out_kfree_skb:
kfree_skb(skb);
return rc;
out:
return rc;
}
/*=======================================================================
Receiver routines
=======================================================================*/
int netdev_max_backlog = 1000;
int netdev_budget = 300;
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
int weight_p = 64; /* old backlog weight */
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
*
* This function receives a packet from a device driver and queues it for
* the upper (protocol) levels to process. It always succeeds. The buffer
* may be dropped during processing for congestion control or by the
* protocol layers.
*
* return values:
* NET_RX_SUCCESS (no congestion)
* NET_RX_CN_LOW (low congestion)
* NET_RX_CN_MOD (moderate congestion)
* NET_RX_CN_HIGH (high congestion)
* NET_RX_DROP (packet was dropped)
*
*/
int netif_rx(struct sk_buff *skb)
{
struct softnet_data *queue;
unsigned long flags;
/* if netpoll wants it, pretend we never saw it */
if (netpoll_rx(skb))
return NET_RX_DROP;
if (!skb->tstamp.off_sec)
net_timestamp(skb);
/*
* The code is rearranged so that the path is the most
* short when CPU is congested, but is still operating.
*/
local_irq_save(flags);
queue = &__get_cpu_var(softnet_data);
__get_cpu_var(netdev_rx_stat).total++;
if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
if (queue->input_pkt_queue.qlen) {
enqueue:
dev_hold(skb->dev);
__skb_queue_tail(&queue->input_pkt_queue, skb);
local_irq_restore(flags);
return NET_RX_SUCCESS;
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
}
netif_rx_schedule(&queue->backlog_dev);
goto enqueue;
}
__get_cpu_var(netdev_rx_stat).dropped++;
local_irq_restore(flags);
kfree_skb(skb);
return NET_RX_DROP;
}
int netif_rx_ni(struct sk_buff *skb)
{
int err;
preempt_disable();
err = netif_rx(skb);
if (local_softirq_pending())
do_softirq();
preempt_enable();
return err;
}
EXPORT_SYMBOL(netif_rx_ni);
static inline struct net_device *skb_bond(struct sk_buff *skb)
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
if (dev->master) {
/*
* On bonding slaves other than the currently active
* slave, suppress duplicates except for 802.3ad
* ETH_P_SLOW and alb non-mcast/bcast.
*/
if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
if (dev->master->priv_flags & IFF_MASTER_ALB) {
if (skb->pkt_type != PACKET_BROADCAST &&
skb->pkt_type != PACKET_MULTICAST)
goto keep;
}
if (dev->master->priv_flags & IFF_MASTER_8023AD &&
skb->protocol == __constant_htons(ETH_P_SLOW))
goto keep;
kfree_skb(skb);
return NULL;
}
keep:
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
}
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
if (sd->completion_queue) {
struct sk_buff *clist;
local_irq_disable();
clist = sd->completion_queue;
sd->completion_queue = NULL;
local_irq_enable();
while (clist) {
struct sk_buff *skb = clist;
clist = clist->next;
BUG_TRAP(!atomic_read(&skb->users));
__kfree_skb(skb);
}
}
if (sd->output_queue) {
struct net_device *head;
local_irq_disable();
head = sd->output_queue;
sd->output_queue = NULL;
local_irq_enable();
while (head) {
struct net_device *dev = head;
head = head->next_sched;
smp_mb__before_clear_bit();
clear_bit(__LINK_STATE_SCHED, &dev->state);
if (spin_trylock(&dev->queue_lock)) {
qdisc_run(dev);
spin_unlock(&dev->queue_lock);
} else {
netif_schedule(dev);
}
}
}
}
static __inline__ int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
}
#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
struct net_bridge;
struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
unsigned char *addr);
void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
static __inline__ int handle_bridge(struct sk_buff **pskb,
struct packet_type **pt_prev, int *ret,
struct net_device *orig_dev)
{
struct net_bridge_port *port;
if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
(port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
return 0;
if (*pt_prev) {
*ret = deliver_skb(*pskb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
return br_handle_frame_hook(port, pskb);
}
#else
#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
#endif
#ifdef CONFIG_NET_CLS_ACT
/* TODO: Maybe we should just force sch_ingress to be compiled in
* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
* a compare and 2 stores extra right now if we dont have it on
* but have CONFIG_NET_CLS_ACT
* NOTE: This doesnt stop any functionality; if you dont have
* the ingress scheduler, you just cant add policies on ingress.
*
*/
static int ing_filter(struct sk_buff *skb)
{
struct Qdisc *q;
struct net_device *dev = skb->dev;
int result = TC_ACT_OK;
if (dev->qdisc_ingress) {
__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
if (MAX_RED_LOOP < ttl++) {
printk("Redir loop detected Dropping packet (%s->%s)\n",
skb->input_dev->name, skb->dev->name);
return TC_ACT_SHOT;
}
skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
spin_lock(&dev->ingress_lock);
if ((q = dev->qdisc_ingress) != NULL)
result = q->enqueue(skb, q);
spin_unlock(&dev->ingress_lock);
}
return result;
}
#endif
int netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
int ret = NET_RX_DROP;
unsigned short type;
/* if we've gotten here through NAPI, check netpoll */
if (skb->dev->poll && netpoll_rx(skb))
return NET_RX_DROP;
if (!skb->tstamp.off_sec)
net_timestamp(skb);
if (!skb->input_dev)
skb->input_dev = skb->dev;
if (!orig_dev)
return NET_RX_DROP;
__get_cpu_var(netdev_rx_stat).total++;
skb->h.raw = skb->nh.raw = skb->data;
skb->mac_len = skb->nh.raw - skb->mac.raw;
pt_prev = NULL;
rcu_read_lock();
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev) {
if (pt_prev)
pt_prev = ptype;
}
}
#ifdef CONFIG_NET_CLS_ACT
if (pt_prev) {
pt_prev = NULL; /* noone else should process this after*/
} else {
skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
}
ret = ing_filter(skb);
if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kfree_skb(skb);
goto out;
}
skb->tc_verd = 0;
ncls:
#endif
handle_diverter(skb);
if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
goto out;
type = skb->protocol;
list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
if (ptype->type == type &&
(!ptype->dev || ptype->dev == skb->dev)) {
if (pt_prev)
pt_prev = ptype;
}
}
if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}
out:
rcu_read_unlock();
return ret;
}
static int process_backlog(struct net_device *backlog_dev, int *budget)
{
int work = 0;
int quota = min(backlog_dev->quota, *budget);
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
backlog_dev->weight = weight_p;
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
for (;;) {
struct sk_buff *skb;
struct net_device *dev;
local_irq_disable();
skb = __skb_dequeue(&queue->input_pkt_queue);
if (!skb)
goto job_done;
local_irq_enable();
dev = skb->dev;
netif_receive_skb(skb);
dev_put(dev);
work++;
if (work >= quota || jiffies - start_time > 1)
break;
}
backlog_dev->quota -= work;
*budget -= work;
return -1;
job_done:
backlog_dev->quota -= work;
*budget -= work;
list_del(&backlog_dev->poll_list);
smp_mb__before_clear_bit();
netif_poll_enable(backlog_dev);
local_irq_enable();
return 0;
}
static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *queue = &__get_cpu_var(softnet_data);
unsigned long start_time = jiffies;
int budget = netdev_budget;
local_irq_disable();
while (!list_empty(&queue->poll_list)) {
struct net_device *dev;
if (budget <= 0 || jiffies - start_time > 1)
goto softnet_break;
local_irq_enable();
dev = list_entry(queue->poll_list.next,
struct net_device, poll_list);
if (dev->quota <= 0 || dev->poll(dev, &budget)) {
list_move_tail(&dev->poll_list, &queue->poll_list);
if (dev->quota < 0)
dev->quota += dev->weight;
else
dev->quota = dev->weight;
} else {
dev_put(dev);
local_irq_disable();
}
}
out:
#ifdef CONFIG_NET_DMA
/*
* There may not be any more sk_buffs coming right now, so push
* any pending DMA copies to hardware
*/
if (net_dma_client) {
struct dma_chan *chan;
rcu_read_lock();
list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
dma_async_memcpy_issue_pending(chan);
rcu_read_unlock();
}
#endif
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
local_irq_enable();
return;
softnet_break:
__get_cpu_var(netdev_rx_stat).time_squeeze++;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
goto out;
}
static gifconf_func_t * gifconf_list [NPROTO];
/**
* register_gifconf - register a SIOCGIF handler
* @family: Address family
* @gifconf: Function handler
*
* Register protocol dependent address dumping routines. The handler
* that is passed must not be freed or reused until it has been replaced
* by another handler.
*/
int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
{
if (family >= NPROTO)
return -EINVAL;
gifconf_list[family] = gifconf;
return 0;
}
/*
* Map an interface index to its name (SIOCGIFNAME)
*/
/*
* We need this ioctl for efficient implementation of the
* if_indextoname() function required by the IPv6 API. Without
* it, we would have to search all the interfaces to find a
* match. --pb
*/
static int dev_ifname(struct ifreq __user *arg)
{
struct net_device *dev;
struct ifreq ifr;
/*
* Fetch the caller's info block.
*/
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
read_lock(&dev_base_lock);
dev = __dev_get_by_index(ifr.ifr_ifindex);
if (!dev) {
read_unlock(&dev_base_lock);
return -ENODEV;