Skip to content
Snippets Groups Projects
dev.c 162 KiB
Newer Older
  • Learn to ignore specific revisions
  • 		net_timestamp_check(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	trace_netif_rx(skb);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    #ifdef CONFIG_RPS
    
    Tom Herbert's avatar
    Tom Herbert committed
    		struct rps_dev_flow voidflow, *rflow = &voidflow;
    
    		rcu_read_lock();
    
    Tom Herbert's avatar
    Tom Herbert committed
    
    		cpu = get_rps_cpu(skb->dev, skb, &rflow);
    
    		if (cpu < 0)
    			cpu = smp_processor_id();
    
    Tom Herbert's avatar
    Tom Herbert committed
    
    		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
    
    
    		rcu_read_unlock();
    
    Tom Herbert's avatar
    Tom Herbert committed
    	{
    		unsigned int qtail;
    		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
    		put_cpu();
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(netif_rx);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    int netif_rx_ni(struct sk_buff *skb)
    {
    	int err;
    
    	preempt_disable();
    	err = netif_rx(skb);
    	if (local_softirq_pending())
    		do_softirq();
    	preempt_enable();
    
    	return err;
    }
    EXPORT_SYMBOL(netif_rx_ni);
    
    static void net_tx_action(struct softirq_action *h)
    {
    	struct softnet_data *sd = &__get_cpu_var(softnet_data);
    
    	if (sd->completion_queue) {
    		struct sk_buff *clist;
    
    		local_irq_disable();
    		clist = sd->completion_queue;
    		sd->completion_queue = NULL;
    		local_irq_enable();
    
    		while (clist) {
    			struct sk_buff *skb = clist;
    			clist = clist->next;
    
    
    			WARN_ON(atomic_read(&skb->users));
    
    			trace_kfree_skb(skb, net_tx_action);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			__kfree_skb(skb);
    		}
    	}
    
    	if (sd->output_queue) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		local_irq_disable();
    		head = sd->output_queue;
    		sd->output_queue = NULL;
    
    		sd->output_queue_tailp = &sd->output_queue;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		local_irq_enable();
    
    		while (head) {
    
    			struct Qdisc *q = head;
    			spinlock_t *root_lock;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			head = head->next_sched;
    
    
    			if (spin_trylock(root_lock)) {
    
    				smp_mb__before_clear_bit();
    				clear_bit(__QDISC_STATE_SCHED,
    					  &q->state);
    
    				qdisc_run(q);
    				spin_unlock(root_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			} else {
    
    				if (!test_bit(__QDISC_STATE_DEACTIVATED,
    
    					__netif_reschedule(q);
    
    				} else {
    					smp_mb__before_clear_bit();
    					clear_bit(__QDISC_STATE_SCHED,
    						  &q->state);
    				}
    
    #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
        (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
    
    /* This hook is defined here for ATM LANE */
    int (*br_fdb_test_addr_hook)(struct net_device *dev,
    			     unsigned char *addr) __read_mostly;
    
    EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    #ifdef CONFIG_NET_CLS_ACT
    /* TODO: Maybe we should just force sch_ingress to be compiled in
     * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
     * a compare and 2 stores extra right now if we dont have it on
     * but have CONFIG_NET_CLS_ACT
    
    Lucas De Marchi's avatar
    Lucas De Marchi committed
     * NOTE: This doesn't stop any functionality; if you dont have
     * the ingress scheduler, you just can't add policies on ingress.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
     */
    
    static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct net_device *dev = skb->dev;
    
    	u32 ttl = G_TC_RTTL(skb->tc_verd);
    
    	int result = TC_ACT_OK;
    	struct Qdisc *q;
    
    	if (unlikely(MAX_RED_LOOP < ttl++)) {
    		if (net_ratelimit())
    			pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
    			       skb->skb_iif, dev->ifindex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
    	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	q = rxq->qdisc;
    
    	if (q != &noop_qdisc) {
    
    		spin_lock(qdisc_lock(q));
    
    		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
    			result = qdisc_enqueue_root(skb, q);
    
    		spin_unlock(qdisc_lock(q));
    	}
    
    static inline struct sk_buff *handle_ing(struct sk_buff *skb,
    					 struct packet_type **pt_prev,
    					 int *ret, struct net_device *orig_dev)
    {
    
    	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
    
    	if (!rxq || rxq->qdisc == &noop_qdisc)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (*pt_prev) {
    		*ret = deliver_skb(skb, *pt_prev, orig_dev);
    		*pt_prev = NULL;
    
    	switch (ing_filter(skb, rxq)) {
    
    	case TC_ACT_SHOT:
    	case TC_ACT_STOLEN:
    		kfree_skb(skb);
    		return NULL;
    	}
    
    out:
    	skb->tc_verd = 0;
    	return skb;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    #endif
    
    
    /**
     *	netdev_rx_handler_register - register receive handler
     *	@dev: device to register a handler for
     *	@rx_handler: receive handler to register
    
     *	@rx_handler_data: data pointer that is used by rx handler
    
     *
     *	Register a receive hander for a device. This handler will then be
     *	called from __netif_receive_skb. A negative errno code is returned
     *	on a failure.
     *
     *	The caller must hold the rtnl_mutex.
    
     *
     *	For a general description of rx_handler, see enum rx_handler_result.
    
     */
    int netdev_rx_handler_register(struct net_device *dev,
    
    			       rx_handler_func_t *rx_handler,
    			       void *rx_handler_data)
    
    {
    	ASSERT_RTNL();
    
    	if (dev->rx_handler)
    		return -EBUSY;
    
    
    	rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
    
    	rcu_assign_pointer(dev->rx_handler, rx_handler);
    
    	return 0;
    }
    EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
    
    /**
     *	netdev_rx_handler_unregister - unregister receive handler
     *	@dev: device to unregister a handler from
     *
     *	Unregister a receive hander from a device.
     *
     *	The caller must hold the rtnl_mutex.
     */
    void netdev_rx_handler_unregister(struct net_device *dev)
    {
    
    	ASSERT_RTNL();
    
    	RCU_INIT_POINTER(dev->rx_handler, NULL);
    	RCU_INIT_POINTER(dev->rx_handler_data, NULL);
    
    }
    EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
    
    
    static int __netif_receive_skb(struct sk_buff *skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct packet_type *ptype, *pt_prev;
    
    	rx_handler_func_t *rx_handler;
    
    	struct net_device *orig_dev;
    
    	bool deliver_exact = false;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int ret = NET_RX_DROP;
    
    Al Viro's avatar
    Al Viro committed
    	__be16 type;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (!netdev_tstamp_prequeue)
    		net_timestamp_check(skb);
    
    	trace_netif_receive_skb(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* if we've gotten here through NAPI, check netpoll */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return NET_RX_DROP;
    
    
    	if (!skb->skb_iif)
    		skb->skb_iif = skb->dev->ifindex;
    
    	orig_dev = skb->dev;
    
    	skb_reset_network_header(skb);
    
    	skb_reset_transport_header(skb);
    
    	skb_reset_mac_len(skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	pt_prev = NULL;
    
    	rcu_read_lock();
    
    
    another_round:
    
    	__this_cpu_inc(softnet_data.processed);
    
    
    	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
    		skb = vlan_untag(skb);
    		if (unlikely(!skb))
    			goto out;
    	}
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #ifdef CONFIG_NET_CLS_ACT
    	if (skb->tc_verd & TC_NCLS) {
    		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
    		goto ncls;
    	}
    #endif
    
    	list_for_each_entry_rcu(ptype, &ptype_all, list) {
    
    		if (!ptype->dev || ptype->dev == skb->dev) {
    
    				ret = deliver_skb(skb, pt_prev, orig_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			pt_prev = ptype;
    		}
    	}
    
    #ifdef CONFIG_NET_CLS_ACT
    
    	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
    	if (!skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto out;
    ncls:
    #endif
    
    
    	rx_handler = rcu_dereference(skb->dev->rx_handler);
    
    	if (vlan_tx_tag_present(skb)) {
    		if (pt_prev) {
    			ret = deliver_skb(skb, pt_prev, orig_dev);
    			pt_prev = NULL;
    		}
    
    		if (vlan_do_receive(&skb, !rx_handler))
    
    			goto another_round;
    		else if (unlikely(!skb))
    			goto out;
    	}
    
    
    	if (rx_handler) {
    		if (pt_prev) {
    			ret = deliver_skb(skb, pt_prev, orig_dev);
    			pt_prev = NULL;
    		}
    
    		switch (rx_handler(&skb)) {
    		case RX_HANDLER_CONSUMED:
    
    		case RX_HANDLER_ANOTHER:
    
    		case RX_HANDLER_EXACT:
    			deliver_exact = true;
    		case RX_HANDLER_PASS:
    			break;
    		default:
    			BUG();
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	/* deliver only exact match when indicated */
    
    	null_or_dev = deliver_exact ? skb->dev : NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	type = skb->protocol;
    
    	list_for_each_entry_rcu(ptype,
    			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
    
    		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
    		     ptype->dev == orig_dev)) {
    
    				ret = deliver_skb(skb, pt_prev, orig_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			pt_prev = ptype;
    		}
    	}
    
    	if (pt_prev) {
    
    		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else {
    
    		atomic_long_inc(&skb->dev->rx_dropped);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		kfree_skb(skb);
    		/* Jamal, now you will not able to escape explaining
    		 * me how you were going to use this. :-)
    		 */
    		ret = NET_RX_DROP;
    	}
    
    out:
    	rcu_read_unlock();
    	return ret;
    }
    
    
    /**
     *	netif_receive_skb - process receive buffer from network
     *	@skb: buffer to process
     *
     *	netif_receive_skb() is the main receive data processing function.
     *	It always succeeds. The buffer may be dropped during processing
     *	for congestion control or by the protocol layers.
     *
     *	This function may only be called from softirq context and interrupts
     *	should be enabled.
     *
     *	Return values (usually ignored):
     *	NET_RX_SUCCESS: no congestion
     *	NET_RX_DROP: packet was dropped
     */
    int netif_receive_skb(struct sk_buff *skb)
    {
    
    	if (netdev_tstamp_prequeue)
    		net_timestamp_check(skb);
    
    
    	if (skb_defer_rx_timestamp(skb))
    		return NET_RX_SUCCESS;
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    #ifdef CONFIG_RPS
    
    	{
    		struct rps_dev_flow voidflow, *rflow = &voidflow;
    		int cpu, ret;
    
    		rcu_read_lock();
    
    		cpu = get_rps_cpu(skb->dev, skb, &rflow);
    
    		if (cpu >= 0) {
    			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
    			rcu_read_unlock();
    		} else {
    			rcu_read_unlock();
    			ret = __netif_receive_skb(skb);
    		}
    
    		return ret;
    
    #else
    	return __netif_receive_skb(skb);
    #endif
    
    EXPORT_SYMBOL(netif_receive_skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /* Network device is going away, flush any packets still pending
     * Called with irqs disabled.
     */
    
    static void flush_backlog(void *arg)
    
    	struct net_device *dev = arg;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	struct softnet_data *sd = &__get_cpu_var(softnet_data);
    
    	struct sk_buff *skb, *tmp;
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	rps_lock(sd);
    
    	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
    
    		if (skb->dev == dev) {
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    			__skb_unlink(skb, &sd->input_pkt_queue);
    
    			input_queue_head_incr(sd);
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	rps_unlock(sd);
    
    
    	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
    		if (skb->dev == dev) {
    			__skb_unlink(skb, &sd->process_queue);
    			kfree_skb(skb);
    
    			input_queue_head_incr(sd);
    
    static int napi_gro_complete(struct sk_buff *skb)
    {
    	struct packet_type *ptype;
    	__be16 type = skb->protocol;
    	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
    	int err = -ENOENT;
    
    
    	if (NAPI_GRO_CB(skb)->count == 1) {
    		skb_shinfo(skb)->gso_size = 0;
    
    
    	rcu_read_lock();
    	list_for_each_entry_rcu(ptype, head, list) {
    		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
    			continue;
    
    		err = ptype->gro_complete(skb);
    		break;
    	}
    	rcu_read_unlock();
    
    	if (err) {
    		WARN_ON(&ptype->list == head);
    		kfree_skb(skb);
    		return NET_RX_SUCCESS;
    	}
    
    out:
    	return netif_receive_skb(skb);
    }
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    inline void napi_gro_flush(struct napi_struct *napi)
    
    {
    	struct sk_buff *skb, *next;
    
    	for (skb = napi->gro_list; skb; skb = next) {
    		next = skb->next;
    		skb->next = NULL;
    		napi_gro_complete(skb);
    	}
    
    
    	napi->gro_list = NULL;
    }
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    EXPORT_SYMBOL(napi_gro_flush);
    
    enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    
    {
    	struct sk_buff **pp = NULL;
    	struct packet_type *ptype;
    	__be16 type = skb->protocol;
    	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
    
    	int same_flow;
    
    	enum gro_result ret;
    
    	if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
    
    	if (skb_is_gso(skb) || skb_has_frag_list(skb))
    
    	rcu_read_lock();
    	list_for_each_entry_rcu(ptype, head, list) {
    		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
    			continue;
    
    
    		skb_set_network_header(skb, skb_gro_offset(skb));
    
    		mac_len = skb->network_header - skb->mac_header;
    		skb->mac_len = mac_len;
    		NAPI_GRO_CB(skb)->same_flow = 0;
    		NAPI_GRO_CB(skb)->flush = 0;
    
    Herbert Xu's avatar
    Herbert Xu committed
    		NAPI_GRO_CB(skb)->free = 0;
    
    
    		pp = ptype->gro_receive(&napi->gro_list, skb);
    		break;
    	}
    	rcu_read_unlock();
    
    	if (&ptype->list == head)
    		goto normal;
    
    
    	same_flow = NAPI_GRO_CB(skb)->same_flow;
    
    	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
    
    	if (pp) {
    		struct sk_buff *nskb = *pp;
    
    		*pp = nskb->next;
    		nskb->next = NULL;
    		napi_gro_complete(nskb);
    
    	if (same_flow)
    
    	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
    
    	NAPI_GRO_CB(skb)->count = 1;
    
    	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
    
    	skb->next = napi->gro_list;
    	napi->gro_list = skb;
    
    	ret = GRO_HELD;
    
    	if (skb_headlen(skb) < skb_gro_offset(skb)) {
    		int grow = skb_gro_offset(skb) - skb_headlen(skb);
    
    		BUG_ON(skb->end - skb->tail < grow);
    
    		memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
    
    		skb->tail += grow;
    		skb->data_len -= grow;
    
    		skb_shinfo(skb)->frags[0].page_offset += grow;
    
    		skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
    
    		if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
    
    			skb_frag_unref(skb, 0);
    
    			memmove(skb_shinfo(skb)->frags,
    				skb_shinfo(skb)->frags + 1,
    
    				--skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
    
    	ret = GRO_NORMAL;
    	goto pull;
    
    EXPORT_SYMBOL(dev_gro_receive);
    
    
    static inline gro_result_t
    
    __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    
    {
    	struct sk_buff *p;
    
    	for (p = napi->gro_list; p; p = p->next) {
    
    		unsigned long diffs;
    
    		diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
    
    		diffs |= p->vlan_tci ^ skb->vlan_tci;
    
    		diffs |= compare_ether_header(skb_mac_header(p),
    
    					      skb_gro_mac_header(skb));
    
    		NAPI_GRO_CB(p)->same_flow = !diffs;
    
    		NAPI_GRO_CB(p)->flush = 0;
    	}
    
    	return dev_gro_receive(napi, skb);
    }
    
    gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
    
    	switch (ret) {
    	case GRO_NORMAL:
    
    		if (netif_receive_skb(skb))
    			ret = GRO_DROP;
    		break;
    
    	case GRO_DROP:
    	case GRO_MERGED_FREE:
    
    Herbert Xu's avatar
    Herbert Xu committed
    		kfree_skb(skb);
    		break;
    
    
    	case GRO_HELD:
    	case GRO_MERGED:
    		break;
    
    }
    EXPORT_SYMBOL(napi_skb_finish);
    
    
    void skb_gro_reset_offset(struct sk_buff *skb)
    {
    	NAPI_GRO_CB(skb)->data_offset = 0;
    	NAPI_GRO_CB(skb)->frag0 = NULL;
    
    	NAPI_GRO_CB(skb)->frag0_len = 0;
    
    	if (skb->mac_header == skb->tail &&
    
    	    !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
    
    		NAPI_GRO_CB(skb)->frag0 =
    
    			skb_frag_address(&skb_shinfo(skb)->frags[0]);
    
    		NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
    
    }
    EXPORT_SYMBOL(skb_gro_reset_offset);
    
    
    gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    
    	skb_gro_reset_offset(skb);
    
    
    	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
    
    }
    EXPORT_SYMBOL(napi_gro_receive);
    
    
    static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
    
    {
    	__skb_pull(skb, skb_headlen(skb));
    	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
    
    	skb->dev = napi->dev;
    
    	skb->skb_iif = 0;
    
    struct sk_buff *napi_get_frags(struct napi_struct *napi)
    
    Herbert Xu's avatar
    Herbert Xu committed
    {
    	struct sk_buff *skb = napi->skb;
    
    	if (!skb) {
    
    		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
    		if (skb)
    			napi->skb = skb;
    
    EXPORT_SYMBOL(napi_get_frags);
    
    gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
    			       gro_result_t ret)
    
    	switch (ret) {
    	case GRO_NORMAL:
    
    		skb->protocol = eth_type_trans(skb, skb->dev);
    
    		if (ret == GRO_HELD)
    			skb_gro_pull(skb, -ETH_HLEN);
    		else if (netif_receive_skb(skb))
    			ret = GRO_DROP;
    
    	case GRO_DROP:
    	case GRO_MERGED_FREE:
    		napi_reuse_skb(napi, skb);
    		break;
    
    EXPORT_SYMBOL(napi_frags_finish);
    
    
    struct sk_buff *napi_frags_skb(struct napi_struct *napi)
    {
    	struct sk_buff *skb = napi->skb;
    	struct ethhdr *eth;
    
    	unsigned int hlen;
    	unsigned int off;
    
    
    	napi->skb = NULL;
    
    	skb_reset_mac_header(skb);
    	skb_gro_reset_offset(skb);
    
    
    	off = skb_gro_offset(skb);
    	hlen = off + sizeof(*eth);
    	eth = skb_gro_header_fast(skb, off);
    	if (skb_gro_header_hard(skb, hlen)) {
    		eth = skb_gro_header_slow(skb, hlen, off);
    		if (unlikely(!eth)) {
    			napi_reuse_skb(napi, skb);
    			skb = NULL;
    			goto out;
    		}
    
    	}
    
    	skb_gro_pull(skb, sizeof(*eth));
    
    	/*
    	 * This works because the only protocols we care about don't require
    	 * special handling.  We'll fix it up properly at the end.
    	 */
    	skb->protocol = eth->h_proto;
    
    out:
    	return skb;
    }
    EXPORT_SYMBOL(napi_frags_skb);
    
    
    gro_result_t napi_gro_frags(struct napi_struct *napi)
    
    	struct sk_buff *skb = napi_frags_skb(napi);
    
    
    	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
    }
    
    Herbert Xu's avatar
    Herbert Xu committed
    EXPORT_SYMBOL(napi_gro_frags);
    
    
    /*
     * net_rps_action sends any pending IPI's for rps.
     * Note: called with local irq disabled, but exits with local irq enabled.
     */
    static void net_rps_action_and_irq_enable(struct softnet_data *sd)
    {
    #ifdef CONFIG_RPS
    	struct softnet_data *remsd = sd->rps_ipi_list;
    
    	if (remsd) {
    		sd->rps_ipi_list = NULL;
    
    		local_irq_enable();
    
    		/* Send pending IPI's to kick RPS processing on remote cpus. */
    		while (remsd) {
    			struct softnet_data *next = remsd->rps_ipi_next;
    
    			if (cpu_online(remsd->cpu))
    				__smp_call_function_single(remsd->cpu,
    							   &remsd->csd, 0);
    			remsd = next;
    		}
    	} else
    #endif
    		local_irq_enable();
    }
    
    
    static int process_backlog(struct napi_struct *napi, int quota)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	int work = 0;
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    #ifdef CONFIG_RPS
    	/* Check if we have pending ipi, its better to send them now,
    	 * not waiting net_rx_action() end.
    	 */
    	if (sd->rps_ipi_list) {
    		local_irq_disable();
    		net_rps_action_and_irq_enable(sd);
    	}
    #endif
    
    	local_irq_disable();
    	while (work < quota) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		struct sk_buff *skb;
    
    		unsigned int qlen;
    
    		while ((skb = __skb_dequeue(&sd->process_queue))) {
    			local_irq_enable();
    			__netif_receive_skb(skb);
    			local_irq_disable();
    
    			input_queue_head_incr(sd);
    			if (++work >= quota) {
    				local_irq_enable();
    				return work;
    			}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		rps_lock(sd);
    
    		qlen = skb_queue_len(&sd->input_pkt_queue);
    
    			skb_queue_splice_tail_init(&sd->input_pkt_queue,
    						   &sd->process_queue);
    
    		if (qlen < quota - work) {
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    			/*
    			 * Inline a custom version of __napi_complete().
    			 * only current cpu owns and manipulates this napi,
    			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
    			 * we can use a plain write instead of clear_bit(),
    			 * and we dont need an smp_mb() memory barrier.
    			 */
    			list_del(&napi->poll_list);
    			napi->state = 0;
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		rps_unlock(sd);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /**
     * __napi_schedule - schedule for receive
    
     * @n: entry to schedule
    
     *
     * The entry's receive function will be scheduled to run
     */
    
    void __napi_schedule(struct napi_struct *n)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    	____napi_schedule(&__get_cpu_var(softnet_data), n);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    void __napi_complete(struct napi_struct *n)
    {
    	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
    	BUG_ON(n->gro_list);
    
    	list_del(&n->poll_list);
    	smp_mb__before_clear_bit();
    	clear_bit(NAPI_STATE_SCHED, &n->state);
    }
    EXPORT_SYMBOL(__napi_complete);
    
    void napi_complete(struct napi_struct *n)
    {
    	unsigned long flags;
    
    	/*
    	 * don't let napi dequeue from the cpu poll list
    	 * just in case its running on a different cpu
    	 */
    	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
    		return;
    
    	napi_gro_flush(n);
    	local_irq_save(flags);
    	__napi_complete(n);
    	local_irq_restore(flags);
    }
    EXPORT_SYMBOL(napi_complete);
    
    void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
    		    int (*poll)(struct napi_struct *, int), int weight)
    {
    	INIT_LIST_HEAD(&napi->poll_list);
    
    	napi->gro_list = NULL;
    
    Herbert Xu's avatar
    Herbert Xu committed
    	napi->skb = NULL;
    
    	napi->poll = poll;
    	napi->weight = weight;
    	list_add(&napi->dev_list, &dev->napi_list);
    	napi->dev = dev;
    
    Herbert Xu's avatar
    Herbert Xu committed
    #ifdef CONFIG_NETPOLL
    
    	spin_lock_init(&napi->poll_lock);
    	napi->poll_owner = -1;
    #endif
    	set_bit(NAPI_STATE_SCHED, &napi->state);
    }
    EXPORT_SYMBOL(netif_napi_add);
    
    void netif_napi_del(struct napi_struct *napi)
    {
    	struct sk_buff *skb, *next;
    
    
    	list_del_init(&napi->dev_list);
    
    	napi_free_frags(napi);
    
    
    	for (skb = napi->gro_list; skb; skb = next) {
    		next = skb->next;
    		skb->next = NULL;
    		kfree_skb(skb);
    	}
    
    	napi->gro_list = NULL;
    
    }
    EXPORT_SYMBOL(netif_napi_del);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    static void net_rx_action(struct softirq_action *h)
    {
    
    	struct softnet_data *sd = &__get_cpu_var(softnet_data);
    
    	unsigned long time_limit = jiffies + 2;
    
    	int budget = netdev_budget;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	local_irq_disable();
    
    
    	while (!list_empty(&sd->poll_list)) {
    
    		struct napi_struct *n;
    		int work, weight;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* If softirq window is exhuasted then punt.
    
    		 * Allow this to run for 2 jiffies since which will allow
    		 * an average latency of 1.5/HZ.
    
    		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			goto softnet_break;
    
    		local_irq_enable();
    
    
    		/* Even though interrupts have been re-enabled, this
    		 * access is safe because interrupts can only add new
    		 * entries to the tail of this list, and only ->poll()
    		 * calls can remove this head entry from the list.
    		 */
    
    		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* This NAPI_STATE_SCHED test is for avoiding a race
    		 * with netpoll's poll_napi().  Only the entity which
    		 * obtains the lock and sees NAPI_STATE_SCHED set will
    		 * actually make the ->poll() call.  Therefore we avoid
    
    Lucas De Marchi's avatar
    Lucas De Marchi committed
    		 * accidentally calling ->poll() when NAPI is not scheduled.
    
    		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
    
    			work = n->poll(n, weight);
    
    
    		WARN_ON_ONCE(work > weight);
    
    		budget -= work;
    
    		local_irq_disable();
    
    		/* Drivers must not modify the NAPI state if they
    		 * consume the entire weight.  In such cases this code
    		 * still "owns" the NAPI instance and therefore can
    		 * move the instance around on the list at-will.
    		 */
    
    		if (unlikely(work == weight)) {
    
    			if (unlikely(napi_disable_pending(n))) {
    				local_irq_enable();
    				napi_complete(n);
    				local_irq_disable();
    			} else
    
    				list_move_tail(&n->poll_list, &sd->poll_list);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    out:
    
    	net_rps_action_and_irq_enable(sd);
    
    #ifdef CONFIG_NET_DMA
    	/*
    	 * There may not be any more sk_buffs coming right now, so push
    	 * any pending DMA copies to hardware
    	 */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return;
    
    softnet_break:
    
    Changli Gao's avatar
    Changli Gao committed
    	sd->time_squeeze++;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
    	goto out;
    }
    
    
    static gifconf_func_t *gifconf_list[NPROTO];
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /**
     *	register_gifconf	-	register a SIOCGIF handler
     *	@family: Address family
     *	@gifconf: Function handler
     *
     *	Register protocol dependent address dumping routines. The handler
     *	that is passed must not be freed or reused until it has been replaced
     *	by another handler.
     */
    
    int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	if (family >= NPROTO)
    		return -EINVAL;
    	gifconf_list[family] = gifconf;
    	return 0;
    }
    
    EXPORT_SYMBOL(register_gifconf);