Newer
Older

Patrick McHardy
committed
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
goto out_kfree_gso_skb;
nskb->next = skb->next;
skb->next = nskb;
return rc;
}
if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
out_kfree_gso_skb:
if (likely(skb->next == NULL))
skb->destructor = DEV_GSO_CB(skb)->destructor;
out_kfree_skb:
kfree_skb(skb);
return rc;
u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
if (skb_rx_queue_recorded(skb)) {
hash = skb_get_rx_queue(skb);
while (unlikely(hash >= dev->real_num_tx_queues))
hash -= dev->real_num_tx_queues;
return hash;
}
if (skb->sk && skb->sk->sk_hash)
hash = (__force u16) skb->protocol ^ skb->rxhash;
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
EXPORT_SYMBOL(skb_tx_hash);
static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
{
if (unlikely(queue_index >= dev->real_num_tx_queues)) {
if (net_ratelimit()) {
pr_warning("%s selects TX queue %d, but "
"real number of TX queues is %d\n",
dev->name, queue_index, dev->real_num_tx_queues);
}
return 0;
}
return queue_index;
}
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
{
struct sock *sk = skb->sk;
queue_index = sk_tx_queue_get(sk);
if (queue_index < 0) {
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue) {
queue_index = ops->ndo_select_queue(dev, skb);
queue_index = dev_cap_txqueue(dev, queue_index);
} else {
queue_index = 0;
if (dev->real_num_tx_queues > 1)
queue_index = skb_tx_hash(dev, skb);

David S. Miller
committed
struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
if (dst && skb_dst(skb) == dst)
sk_tx_queue_set(sk, queue_index);
}
skb_set_queue_mapping(skb, queue_index);
return netdev_get_tx_queue(dev, queue_index);
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct net_device *dev,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
bool contended = qdisc_is_running(q);
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
* This permits __QDISC_STATE_RUNNING owner to get the lock more often
* and dequeue packets faster.
*/
if (unlikely(contended))
spin_lock(&q->busylock);
spin_lock(root_lock);
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
kfree_skb(skb);
rc = NET_XMIT_DROP;
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
qdisc_run_begin(q)) {
/*
* This is a work-conserving queue; there are no old skbs
* waiting to be sent out; and the qdisc is not running -
* xmit the skb directly.
*/
if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
skb_dst_force(skb);
__qdisc_update_bstats(q, skb->len);
if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
contended = false;
}
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
rc = qdisc_enqueue_root(skb, q);
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
contended = false;
}
__qdisc_run(q);
}
}
spin_unlock(root_lock);
if (unlikely(contended))
spin_unlock(&q->busylock);
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
*
* Queue a buffer for transmission to a network device. The caller must
* have set the device and priority and built the buffer before calling
* this function. The function can be called from an interrupt.
*
* A negative errno code is returned on a failure. A success does not
* guarantee the frame will be transmitted as it may be dropped due
* to congestion or traffic shaping.
*
* -----------------------------------------------------------------------------------
* I notice this method can also return errors from the queue disciplines,
* including NET_XMIT_DROP, which is a positive value. So, errors can also
* be positive.
*
* Regardless of the return value, the skb is consumed, so it is currently
* difficult to retry a send to this method. (You can bump the ref count
* before sending to hold a reference for retry if you are careful.)
*
* When calling this method, interrupts MUST be enabled. This is because
* the BH enable code must have IRQs enabled so that it will not deadlock.
* --BLG
*/
int dev_queue_xmit(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
txq = dev_pick_tx(dev, skb);
q = rcu_dereference_bh(txq->qdisc);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
rc = __dev_xmit_skb(skb, q, dev, txq);
}
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
Really, it is unlikely that netif_tx_lock protection is necessary
here. (f.e. loopback and IP tunnels are clean ignoring statistics
counters.)
However, it is possible, that they rely on protection
made by us here.
Check this and shot the lock. It is not prone from deadlocks.
Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
if (txq->xmit_lock_owner != cpu) {
HARD_TX_LOCK(dev, txq, cpu);
if (!netif_tx_queue_stopped(txq)) {
rc = dev_hard_start_xmit(skb, dev, txq);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
HARD_TX_UNLOCK(dev, txq);
if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to "
"queue packet!\n", dev->name);
} else {
/* Recursion is detected! It is possible,
* unfortunately */
if (net_ratelimit())
printk(KERN_CRIT "Dead loop on virtual device "
"%s, fix it urgently!\n", dev->name);
}
}
rc = -ENETDOWN;
/*=======================================================================
Receiver routines
=======================================================================*/
int netdev_max_backlog __read_mostly = 1000;
int netdev_tstamp_prequeue __read_mostly = 1;
int netdev_budget __read_mostly = 300;
int weight_p __read_mostly = 64; /* old backlog weight */
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
struct napi_struct *napi)
{
list_add_tail(&napi->poll_list, &sd->poll_list);
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
* __skb_get_rxhash: calculate a flow hash based on src/dst addresses
* and src/dst port numbers. Returns a non-zero hash number on success
* and 0 on failure.
__u32 __skb_get_rxhash(struct sk_buff *skb)
int nhoff, hash = 0, poff;
struct ipv6hdr *ip6;
struct iphdr *ip;
u8 ip_proto;
u32 addr1, addr2, ihl;
union {
u32 v32;
u16 v16[2];
} ports;
nhoff = skb_network_offset(skb);
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
ip = (struct iphdr *) (skb->data + nhoff);
if (ip->frag_off & htons(IP_MF | IP_OFFSET))
ip_proto = 0;
else
ip_proto = ip->protocol;
addr1 = (__force u32) ip->saddr;
addr2 = (__force u32) ip->daddr;
ihl = ip->ihl;
break;
case __constant_htons(ETH_P_IPV6):
if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
ip6 = (struct ipv6hdr *) (skb->data + nhoff);
addr1 = (__force u32) ip6->saddr.s6_addr32[3];
addr2 = (__force u32) ip6->daddr.s6_addr32[3];
ihl = (40 >> 2);
break;
default:
goto done;
}
ports.v32 = 0;
poff = proto_ports_offset(ip_proto);
if (poff >= 0) {
nhoff += ihl * 4 + poff;
if (pskb_may_pull(skb, nhoff + 4)) {
ports.v32 = * (__force u32 *) (skb->data + nhoff);
if (ports.v16[1] < ports.v16[0])
swap(ports.v16[0], ports.v16[1]);
/* get a consistent hash (same value on both flow directions) */
if (addr2 < addr1)
swap(addr1, addr2);
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
if (!hash)
hash = 1;
done:
return hash;
}
EXPORT_SYMBOL(__skb_get_rxhash);
#ifdef CONFIG_RPS
/* One global table that all flow-based protocols share. */
struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
EXPORT_SYMBOL(rps_sock_flow_table);
/*
* get_rps_cpu is called from netif_receive_skb and returns the target
* CPU from the RPS map of the receiving queue for a given skb.
* rcu_read_lock must be held on entry.
*/
static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
struct rps_dev_flow **rflowp)
{
struct netdev_rx_queue *rxqueue;
struct rps_map *map;
struct rps_dev_flow_table *flow_table;
struct rps_sock_flow_table *sock_flow_table;
int cpu = -1;
u16 tcpu;
if (skb_rx_queue_recorded(skb)) {
u16 index = skb_get_rx_queue(skb);
if (unlikely(index >= dev->num_rx_queues)) {
WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
"on queue %u, but number of RX queues is %u\n",
dev->name, index, dev->num_rx_queues);
goto done;
}
rxqueue = dev->_rx + index;
} else
rxqueue = dev->_rx;
if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
goto done;
skb_reset_network_header(skb);
if (!skb_get_rxhash(skb))
goto done;
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
flow_table = rcu_dereference(rxqueue->rps_flow_table);
sock_flow_table = rcu_dereference(rps_sock_flow_table);
if (flow_table && sock_flow_table) {
u16 next_cpu;
struct rps_dev_flow *rflow;
rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
tcpu = rflow->cpu;
next_cpu = sock_flow_table->ents[skb->rxhash &
sock_flow_table->mask];
/*
* If the desired CPU (where last recvmsg was done) is
* different from current CPU (one in the rx-queue flow
* table entry), switch if one of the following holds:
* - Current CPU is unset (equal to RPS_NO_CPU).
* - Current CPU is offline.
* - The current CPU's queue tail has advanced beyond the
* last packet that was enqueued using this table entry.
* This guarantees that all previous packets for the flow
* have been dequeued, thus preserving in order delivery.
*/
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
rflow->last_qtail)) >= 0)) {
tcpu = rflow->cpu = next_cpu;
if (tcpu != RPS_NO_CPU)
rflow->last_qtail = per_cpu(softnet_data,
tcpu).input_queue_head;
}
if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
cpu = tcpu;
goto done;
}
}
map = rcu_dereference(rxqueue->rps_map);
if (map) {
tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
if (cpu_online(tcpu)) {
cpu = tcpu;
goto done;
}
}
done:
return cpu;
}
/* Called from hardirq (IPI) context */
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
/*
* Check if this softnet_data structure is another cpu one
* If yes, queue it to our IPI list and return 1
* If no, return 0
*/
static int rps_ipi_queued(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
struct softnet_data *mysd = &__get_cpu_var(softnet_data);
if (sd != mysd) {
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
return 1;
}
#endif /* CONFIG_RPS */
return 0;
}
/*
* enqueue_to_backlog is called to queue an skb to a per CPU backlog
* queue (may be a remote CPU queue).
*/
static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
unsigned int *qtail)
if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
if (skb_queue_len(&sd->input_pkt_queue)) {
input_queue_tail_incr_save(sd, qtail);
/* Schedule NAPI for backlog device
* We can use non atomic operation since we own the queue lock
*/
if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
local_irq_restore(flags);
kfree_skb(skb);
return NET_RX_DROP;
}
/**
* netif_rx - post buffer to the network code
* @skb: buffer to post
*
* This function receives a packet from a device driver and queues it for
* the upper (protocol) levels to process. It always succeeds. The buffer
* may be dropped during processing for congestion control or by the
* protocol layers.
*
* return values:
* NET_RX_SUCCESS (no congestion)
* NET_RX_DROP (packet was dropped)
*
*/
int netif_rx(struct sk_buff *skb)
{
/* if netpoll wants it, pretend we never saw it */
if (netpoll_rx(skb))
return NET_RX_DROP;
if (netdev_tstamp_prequeue)
net_timestamp_check(skb);
struct rps_dev_flow voidflow, *rflow = &voidflow;
preempt_disable();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (cpu < 0)
cpu = smp_processor_id();
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
{
unsigned int qtail;
ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
put_cpu();
}
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
int netif_rx_ni(struct sk_buff *skb)
{
int err;
preempt_disable();
err = netif_rx(skb);
if (local_softirq_pending())
do_softirq();
preempt_enable();
return err;
}
EXPORT_SYMBOL(netif_rx_ni);
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
if (sd->completion_queue) {
struct sk_buff *clist;
local_irq_disable();
clist = sd->completion_queue;
sd->completion_queue = NULL;
local_irq_enable();
while (clist) {
struct sk_buff *skb = clist;
clist = clist->next;
WARN_ON(atomic_read(&skb->users));
__kfree_skb(skb);
}
}
if (sd->output_queue) {
struct Qdisc *head;
local_irq_disable();
head = sd->output_queue;
sd->output_queue = NULL;
sd->output_queue_tailp = &sd->output_queue;
struct Qdisc *q = head;
spinlock_t *root_lock;
root_lock = qdisc_lock(q);
if (spin_trylock(root_lock)) {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
qdisc_run(q);
spin_unlock(root_lock);
if (!test_bit(__QDISC_STATE_DEACTIVATED,
__netif_reschedule(q);
} else {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
}
static inline int deliver_skb(struct sk_buff *skb,
struct packet_type *pt_prev,
struct net_device *orig_dev)
return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
(defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
/* This hook is defined here for ATM LANE */
int (*br_fdb_test_addr_hook)(struct net_device *dev,
unsigned char *addr) __read_mostly;
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#ifdef CONFIG_NET_CLS_ACT
/* TODO: Maybe we should just force sch_ingress to be compiled in
* when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
* a compare and 2 stores extra right now if we dont have it on
* but have CONFIG_NET_CLS_ACT
* NOTE: This doesnt stop any functionality; if you dont have
* the ingress scheduler, you just cant add policies on ingress.
*
*/
static int ing_filter(struct sk_buff *skb)
u32 ttl = G_TC_RTTL(skb->tc_verd);
struct netdev_queue *rxq;
int result = TC_ACT_OK;
struct Qdisc *q;
if (unlikely(MAX_RED_LOOP < ttl++)) {
if (net_ratelimit())
pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
skb->skb_iif, dev->ifindex);
return TC_ACT_SHOT;
}
skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
rxq = &dev->rx_queue;
if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
result = qdisc_enqueue_root(skb, q);
spin_unlock(qdisc_lock(q));
}
return result;
}
static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
if (skb->dev->rx_queue.qdisc == &noop_qdisc)
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
switch (ing_filter(skb)) {
case TC_ACT_SHOT:
case TC_ACT_STOLEN:
kfree_skb(skb);
return NULL;
}
out:
skb->tc_verd = 0;
return skb;
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
/*
* netif_nit_deliver - deliver received packets to network taps
* @skb: buffer
*
* This function is used to deliver incoming packets to network
* taps. It should be used when the normal netif_receive_skb path
* is bypassed, for example because of VLAN acceleration.
*/
void netif_nit_deliver(struct sk_buff *skb)
{
struct packet_type *ptype;
if (list_empty(&ptype_all))
return;
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
skb->mac_len = skb->network_header - skb->mac_header;
rcu_read_lock();
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (!ptype->dev || ptype->dev == skb->dev)
deliver_skb(skb, ptype, skb->dev);
}
rcu_read_unlock();
}
/**
* netdev_rx_handler_register - register receive handler
* @dev: device to register a handler for
* @rx_handler: receive handler to register
* @rx_handler_data: data pointer that is used by rx handler
*
* Register a receive hander for a device. This handler will then be
* called from __netif_receive_skb. A negative errno code is returned
* on a failure.
*
* The caller must hold the rtnl_mutex.
*/
int netdev_rx_handler_register(struct net_device *dev,
rx_handler_func_t *rx_handler,
void *rx_handler_data)
{
ASSERT_RTNL();
if (dev->rx_handler)
return -EBUSY;
rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
rcu_assign_pointer(dev->rx_handler, rx_handler);
return 0;
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
/**
* netdev_rx_handler_unregister - unregister receive handler
* @dev: device to unregister a handler from
*
* Unregister a receive hander from a device.
*
* The caller must hold the rtnl_mutex.
*/
void netdev_rx_handler_unregister(struct net_device *dev)
{
ASSERT_RTNL();
rcu_assign_pointer(dev->rx_handler, NULL);
rcu_assign_pointer(dev->rx_handler_data, NULL);
}
EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
struct net_device *master)
{
if (skb->pkt_type == PACKET_HOST) {
u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
memcpy(dest, master->dev_addr, ETH_ALEN);
}
}
/* On bonding slaves other than the currently active slave, suppress
* duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
* ARP on active-backup slaves with arp_validate enabled.
*/
int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
{
struct net_device *dev = skb->dev;
if (master->priv_flags & IFF_MASTER_ARPMON)
dev->last_rx = jiffies;
if ((master->priv_flags & IFF_MASTER_ALB) &&
(master->priv_flags & IFF_BRIDGE_PORT)) {
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
/* Do address unmangle. The local destination address
* will be always the one master has. Provides the right
* functionality in a bridge.
*/
skb_bond_set_mac_by_master(skb, master);
}
if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
skb->protocol == __cpu_to_be16(ETH_P_ARP))
return 0;
if (master->priv_flags & IFF_MASTER_ALB) {
if (skb->pkt_type != PACKET_BROADCAST &&
skb->pkt_type != PACKET_MULTICAST)
return 0;
}
if (master->priv_flags & IFF_MASTER_8023AD &&
skb->protocol == __cpu_to_be16(ETH_P_SLOW))
return 0;
return 1;
}
return 0;
}
EXPORT_SYMBOL(__skb_bond_should_drop);
static int __netif_receive_skb(struct sk_buff *skb)
rx_handler_func_t *rx_handler;
struct net_device *null_or_orig;
struct net_device *orig_or_bond;
if (!netdev_tstamp_prequeue)
net_timestamp_check(skb);
if (vlan_tx_tag_present(skb))
vlan_hwaccel_do_receive(skb);
/* if we've gotten here through NAPI, check netpoll */
if (netpoll_receive_skb(skb))
if (!skb->skb_iif)
skb->skb_iif = skb->dev->ifindex;
/*
* bonding note: skbs received on inactive slaves should only
* be delivered to pkt handlers that are exact matches. Also
* the deliver_no_wcard flag will be set. If packet handlers
* are sensitive to duplicate packets these skbs will need to
* be dropped at the handler. The vlan accel path may have
* already set the deliver_no_wcard flag.
*/
master = ACCESS_ONCE(orig_dev->master);
if (skb->deliver_no_wcard)
null_or_orig = orig_dev;
else if (master) {
if (skb_bond_should_drop(skb, master)) {
skb->deliver_no_wcard = 1;
null_or_orig = orig_dev; /* deliver only exact match */
__this_cpu_inc(softnet_data.processed);
skb_reset_network_header(skb);
skb_reset_transport_header(skb);

Arnaldo Carvalho de Melo
committed
skb->mac_len = skb->network_header - skb->mac_header;
pt_prev = NULL;
rcu_read_lock();
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
goto ncls;
}
#endif
list_for_each_entry_rcu(ptype, &ptype_all, list) {
if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
ptype->dev == orig_dev) {
pt_prev = ptype;
}
}
#ifdef CONFIG_NET_CLS_ACT
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
/* Handle special case of bridge or macvlan */
rx_handler = rcu_dereference(skb->dev->rx_handler);
if (rx_handler) {
if (pt_prev) {
ret = deliver_skb(skb, pt_prev, orig_dev);
pt_prev = NULL;
}
skb = rx_handler(skb);
if (!skb)
goto out;
}
/*
* Make sure frames received on VLAN interfaces stacked on
* bonding interfaces still make their way to any base bonding
* device that may have registered for a specific ptype. The
* handler may have to adjust skb->dev and orig_dev.
*/
orig_or_bond = orig_dev;
if ((skb->dev->priv_flags & IFF_802_1Q_VLAN) &&
(vlan_dev_real_dev(skb->dev)->priv_flags & IFF_BONDING)) {
orig_or_bond = vlan_dev_real_dev(skb->dev);
}
list_for_each_entry_rcu(ptype,
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
if (ptype->type == type && (ptype->dev == null_or_orig ||
ptype->dev == skb->dev || ptype->dev == orig_dev ||
ptype->dev == orig_or_bond)) {
pt_prev = ptype;
}
}
if (pt_prev) {
ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
} else {
kfree_skb(skb);
/* Jamal, now you will not able to escape explaining
* me how you were going to use this. :-)
*/
ret = NET_RX_DROP;
}
out:
rcu_read_unlock();
return ret;
}
/**
* netif_receive_skb - process receive buffer from network
* @skb: buffer to process
*
* netif_receive_skb() is the main receive data processing function.
* It always succeeds. The buffer may be dropped during processing
* for congestion control or by the protocol layers.
*
* This function may only be called from softirq context and interrupts
* should be enabled.
*
* Return values (usually ignored):
* NET_RX_SUCCESS: no congestion
* NET_RX_DROP: packet was dropped
*/
int netif_receive_skb(struct sk_buff *skb)
{
if (netdev_tstamp_prequeue)
net_timestamp_check(skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
{
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu, ret;
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
} else {
rcu_read_unlock();
ret = __netif_receive_skb(skb);
}