Skip to content
Snippets Groups Projects
dev.c 128 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    			clist = clist->next;
    
    
    			WARN_ON(atomic_read(&skb->users));
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			__kfree_skb(skb);
    		}
    	}
    
    	if (sd->output_queue) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		local_irq_disable();
    		head = sd->output_queue;
    		sd->output_queue = NULL;
    		local_irq_enable();
    
    		while (head) {
    
    			struct Qdisc *q = head;
    			spinlock_t *root_lock;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			head = head->next_sched;
    
    
    			if (spin_trylock(root_lock)) {
    
    				smp_mb__before_clear_bit();
    				clear_bit(__QDISC_STATE_SCHED,
    					  &q->state);
    
    				qdisc_run(q);
    				spin_unlock(root_lock);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			} else {
    
    				if (!test_bit(__QDISC_STATE_DEACTIVATED,
    
    					__netif_reschedule(q);
    
    				} else {
    					smp_mb__before_clear_bit();
    					clear_bit(__QDISC_STATE_SCHED,
    						  &q->state);
    				}
    
    static inline int deliver_skb(struct sk_buff *skb,
    			      struct packet_type *pt_prev,
    			      struct net_device *orig_dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	atomic_inc(&skb->users);
    
    	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
    
    /* These hooks defined here for ATM */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    struct net_bridge;
    struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
    						unsigned char *addr);
    
    void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /*
     * If bridge module is loaded call bridging hook.
     *  returns NULL if packet was consumed.
     */
    struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
    					struct sk_buff *skb) __read_mostly;
    static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
    					    struct packet_type **pt_prev, int *ret,
    					    struct net_device *orig_dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct net_bridge_port *port;
    
    
    	if (skb->pkt_type == PACKET_LOOPBACK ||
    	    (port = rcu_dereference(skb->dev->br_port)) == NULL)
    		return skb;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	if (*pt_prev) {
    
    		*ret = deliver_skb(skb, *pt_prev, orig_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		*pt_prev = NULL;
    
    	return br_handle_frame_hook(port, skb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    #else
    
    #define handle_bridge(skb, pt_prev, ret, orig_dev)	(skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #endif
    
    
    #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
    struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
    EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
    
    static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
    					     struct packet_type **pt_prev,
    					     int *ret,
    					     struct net_device *orig_dev)
    {
    	if (skb->dev->macvlan_port == NULL)
    		return skb;
    
    	if (*pt_prev) {
    		*ret = deliver_skb(skb, *pt_prev, orig_dev);
    		*pt_prev = NULL;
    	}
    	return macvlan_handle_frame_hook(skb);
    }
    #else
    #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
    #endif
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    #ifdef CONFIG_NET_CLS_ACT
    /* TODO: Maybe we should just force sch_ingress to be compiled in
     * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
     * a compare and 2 stores extra right now if we dont have it on
     * but have CONFIG_NET_CLS_ACT
    
     * NOTE: This doesnt stop any functionality; if you dont have
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     * the ingress scheduler, you just cant add policies on ingress.
     *
     */
    
    static int ing_filter(struct sk_buff *skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct net_device *dev = skb->dev;
    
    	u32 ttl = G_TC_RTTL(skb->tc_verd);
    
    	struct netdev_queue *rxq;
    	int result = TC_ACT_OK;
    	struct Qdisc *q;
    
    	if (MAX_RED_LOOP < ttl++) {
    		printk(KERN_WARNING
    		       "Redir loop detected Dropping packet (%d->%d)\n",
    		       skb->iif, dev->ifindex);
    		return TC_ACT_SHOT;
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
    	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	q = rxq->qdisc;
    
    	if (q != &noop_qdisc) {
    
    		spin_lock(qdisc_lock(q));
    
    		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
    			result = qdisc_enqueue_root(skb, q);
    
    		spin_unlock(qdisc_lock(q));
    	}
    
    static inline struct sk_buff *handle_ing(struct sk_buff *skb,
    					 struct packet_type **pt_prev,
    					 int *ret, struct net_device *orig_dev)
    {
    
    	if (skb->dev->rx_queue.qdisc == &noop_qdisc)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (*pt_prev) {
    		*ret = deliver_skb(skb, *pt_prev, orig_dev);
    		*pt_prev = NULL;
    	} else {
    		/* Huh? Why does turning on AF_PACKET affect this? */
    		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
    
    	switch (ing_filter(skb)) {
    	case TC_ACT_SHOT:
    	case TC_ACT_STOLEN:
    		kfree_skb(skb);
    		return NULL;
    	}
    
    out:
    	skb->tc_verd = 0;
    	return skb;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    #endif
    
    
    /*
     * 	netif_nit_deliver - deliver received packets to network taps
     * 	@skb: buffer
     *
     * 	This function is used to deliver incoming packets to network
     * 	taps. It should be used when the normal netif_receive_skb path
     * 	is bypassed, for example because of VLAN acceleration.
     */
    void netif_nit_deliver(struct sk_buff *skb)
    {
    	struct packet_type *ptype;
    
    	if (list_empty(&ptype_all))
    		return;
    
    	skb_reset_network_header(skb);
    	skb_reset_transport_header(skb);
    	skb->mac_len = skb->network_header - skb->mac_header;
    
    	rcu_read_lock();
    	list_for_each_entry_rcu(ptype, &ptype_all, list) {
    		if (!ptype->dev || ptype->dev == skb->dev)
    			deliver_skb(skb, ptype, skb->dev);
    	}
    	rcu_read_unlock();
    }
    
    
    /**
     *	netif_receive_skb - process receive buffer from network
     *	@skb: buffer to process
     *
     *	netif_receive_skb() is the main receive data processing function.
     *	It always succeeds. The buffer may be dropped during processing
     *	for congestion control or by the protocol layers.
     *
     *	This function may only be called from softirq context and interrupts
     *	should be enabled.
     *
     *	Return values (usually ignored):
     *	NET_RX_SUCCESS: no congestion
     *	NET_RX_DROP: packet was dropped
     */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    int netif_receive_skb(struct sk_buff *skb)
    {
    	struct packet_type *ptype, *pt_prev;
    
    	struct net_device *orig_dev;
    
    	struct net_device *null_or_orig;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int ret = NET_RX_DROP;
    
    Al Viro's avatar
    Al Viro committed
    	__be16 type;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
    		return NET_RX_SUCCESS;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/* if we've gotten here through NAPI, check netpoll */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		return NET_RX_DROP;
    
    
    	if (!skb->tstamp.tv64)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (!skb->iif)
    		skb->iif = skb->dev->ifindex;
    
    	null_or_orig = NULL;
    
    	orig_dev = skb->dev;
    	if (orig_dev->master) {
    
    		if (skb_bond_should_drop(skb))
    			null_or_orig = orig_dev; /* deliver only exact match */
    		else
    			skb->dev = orig_dev->master;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	__get_cpu_var(netdev_rx_stat).total++;
    
    
    	skb_reset_network_header(skb);
    
    	skb_reset_transport_header(skb);
    
    	skb->mac_len = skb->network_header - skb->mac_header;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	pt_prev = NULL;
    
    	rcu_read_lock();
    
    #ifdef CONFIG_NET_CLS_ACT
    	if (skb->tc_verd & TC_NCLS) {
    		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
    		goto ncls;
    	}
    #endif
    
    	list_for_each_entry_rcu(ptype, &ptype_all, list) {
    
    		if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
    		    ptype->dev == orig_dev) {
    
    				ret = deliver_skb(skb, pt_prev, orig_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			pt_prev = ptype;
    		}
    	}
    
    #ifdef CONFIG_NET_CLS_ACT
    
    	skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
    	if (!skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto out;
    ncls:
    #endif
    
    
    	skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
    
    	if (!skb)
    		goto out;
    	skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		goto out;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	type = skb->protocol;
    
    	list_for_each_entry_rcu(ptype,
    			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		if (ptype->type == type &&
    
    		    (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
    		     ptype->dev == orig_dev)) {
    
    				ret = deliver_skb(skb, pt_prev, orig_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			pt_prev = ptype;
    		}
    	}
    
    	if (pt_prev) {
    
    		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	} else {
    		kfree_skb(skb);
    		/* Jamal, now you will not able to escape explaining
    		 * me how you were going to use this. :-)
    		 */
    		ret = NET_RX_DROP;
    	}
    
    out:
    	rcu_read_unlock();
    	return ret;
    }
    
    
    /* Network device is going away, flush any packets still pending  */
    static void flush_backlog(void *arg)
    {
    	struct net_device *dev = arg;
    	struct softnet_data *queue = &__get_cpu_var(softnet_data);
    	struct sk_buff *skb, *tmp;
    
    	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
    		if (skb->dev == dev) {
    			__skb_unlink(skb, &queue->input_pkt_queue);
    			kfree_skb(skb);
    		}
    }
    
    
    static int napi_gro_complete(struct sk_buff *skb)
    {
    	struct packet_type *ptype;
    	__be16 type = skb->protocol;
    	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
    	int err = -ENOENT;
    
    
    Herbert Xu's avatar
    Herbert Xu committed
    	if (NAPI_GRO_CB(skb)->count == 1)
    
    		goto out;
    
    	rcu_read_lock();
    	list_for_each_entry_rcu(ptype, head, list) {
    		if (ptype->type != type || ptype->dev || !ptype->gro_complete)
    			continue;
    
    		err = ptype->gro_complete(skb);
    		break;
    	}
    	rcu_read_unlock();
    
    	if (err) {
    		WARN_ON(&ptype->list == head);
    		kfree_skb(skb);
    		return NET_RX_SUCCESS;
    	}
    
    out:
    
    	skb_shinfo(skb)->gso_size = 0;
    
    	return netif_receive_skb(skb);
    }
    
    void napi_gro_flush(struct napi_struct *napi)
    {
    	struct sk_buff *skb, *next;
    
    	for (skb = napi->gro_list; skb; skb = next) {
    		next = skb->next;
    		skb->next = NULL;
    		napi_gro_complete(skb);
    	}
    
    
    	napi->gro_list = NULL;
    }
    EXPORT_SYMBOL(napi_gro_flush);
    
    
    void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
    {
    	unsigned int offset = skb_gro_offset(skb);
    
    	hlen += offset;
    	if (hlen <= skb_headlen(skb))
    		return skb->data + offset;
    
    	if (unlikely(!skb_shinfo(skb)->nr_frags ||
    		     skb_shinfo(skb)->frags[0].size <=
    		     hlen - skb_headlen(skb) ||
    		     PageHighMem(skb_shinfo(skb)->frags[0].page)))
    		return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
    
    	return page_address(skb_shinfo(skb)->frags[0].page) +
    
    	       skb_shinfo(skb)->frags[0].page_offset +
    	       offset - skb_headlen(skb);
    
    }
    EXPORT_SYMBOL(skb_gro_header);
    
    
    int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    
    {
    	struct sk_buff **pp = NULL;
    	struct packet_type *ptype;
    	__be16 type = skb->protocol;
    	struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
    
    	int same_flow;
    
    
    	if (!(skb->dev->features & NETIF_F_GRO))
    		goto normal;
    
    
    	if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
    		goto normal;
    
    
    	rcu_read_lock();
    	list_for_each_entry_rcu(ptype, head, list) {
    		if (ptype->type != type || ptype->dev || !ptype->gro_receive)
    			continue;
    
    
    		skb_set_network_header(skb, skb_gro_offset(skb));
    
    		mac_len = skb->network_header - skb->mac_header;
    		skb->mac_len = mac_len;
    		NAPI_GRO_CB(skb)->same_flow = 0;
    		NAPI_GRO_CB(skb)->flush = 0;
    
    Herbert Xu's avatar
    Herbert Xu committed
    		NAPI_GRO_CB(skb)->free = 0;
    
    
    		pp = ptype->gro_receive(&napi->gro_list, skb);
    		break;
    	}
    	rcu_read_unlock();
    
    	if (&ptype->list == head)
    		goto normal;
    
    
    	same_flow = NAPI_GRO_CB(skb)->same_flow;
    
    	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
    
    	if (pp) {
    		struct sk_buff *nskb = *pp;
    
    		*pp = nskb->next;
    		nskb->next = NULL;
    		napi_gro_complete(nskb);
    
    	if (same_flow)
    
    	if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
    
    	NAPI_GRO_CB(skb)->count = 1;
    
    	skb_shinfo(skb)->gso_size = skb_gro_len(skb);
    
    	skb->next = napi->gro_list;
    	napi->gro_list = skb;
    
    	ret = GRO_HELD;
    
    pull:
    	if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
    		if (napi->gro_list == skb)
    			napi->gro_list = skb->next;
    		ret = GRO_DROP;
    	}
    
    
    	ret = GRO_NORMAL;
    	goto pull;
    
    EXPORT_SYMBOL(dev_gro_receive);
    
    static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    {
    	struct sk_buff *p;
    
    	for (p = napi->gro_list; p; p = p->next) {
    
    		NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
    			skb_mac_header(p), skb_gro_mac_header(skb));
    
    		NAPI_GRO_CB(p)->flush = 0;
    	}
    
    	return dev_gro_receive(napi, skb);
    }
    
    int napi_skb_finish(int ret, struct sk_buff *skb)
    
    	int err = NET_RX_SUCCESS;
    
    
    	if (netpoll_receive_skb(skb))
    		return NET_RX_DROP;
    
    
    	switch (ret) {
    	case GRO_NORMAL:
    
    Herbert Xu's avatar
    Herbert Xu committed
    		return netif_receive_skb(skb);
    
    
    	case GRO_DROP:
    		err = NET_RX_DROP;
    		/* fall through */
    
    	case GRO_MERGED_FREE:
    
    Herbert Xu's avatar
    Herbert Xu committed
    		kfree_skb(skb);
    		break;
    	}
    
    
    	return err;
    }
    EXPORT_SYMBOL(napi_skb_finish);
    
    int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
    {
    
    	skb_gro_reset_offset(skb);
    
    
    	return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
    
    }
    EXPORT_SYMBOL(napi_gro_receive);
    
    
    void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
    {
    	__skb_pull(skb, skb_headlen(skb));
    	skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
    
    	napi->skb = skb;
    }
    EXPORT_SYMBOL(napi_reuse_skb);
    
    struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
    				  struct napi_gro_fraginfo *info)
    
    Herbert Xu's avatar
    Herbert Xu committed
    {
    	struct net_device *dev = napi->dev;
    	struct sk_buff *skb = napi->skb;
    
    	struct ethhdr *eth;
    
    	skb_frag_t *frag;
    	int i;
    
    Herbert Xu's avatar
    Herbert Xu committed
    
    	napi->skb = NULL;
    
    	if (!skb) {
    		skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
    		if (!skb)
    			goto out;
    
    		skb_reserve(skb, NET_IP_ALIGN);
    	}
    
    	BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
    
    	frag = &info->frags[info->nr_frags - 1];
    
    	for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
    		skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
    				   frag->size);
    		frag++;
    	}
    
    Herbert Xu's avatar
    Herbert Xu committed
    	skb_shinfo(skb)->nr_frags = info->nr_frags;
    
    	skb->data_len = info->len;
    	skb->len += info->len;
    	skb->truesize += info->len;
    
    
    	skb_reset_mac_header(skb);
    	skb_gro_reset_offset(skb);
    
    	eth = skb_gro_header(skb, sizeof(*eth));
    	if (!eth) {
    
    		napi_reuse_skb(napi, skb);
    
    	skb_gro_pull(skb, sizeof(*eth));
    
    	/*
    	 * This works because the only protocols we care about don't require
    	 * special handling.  We'll fix it up properly at the end.
    	 */
    	skb->protocol = eth->h_proto;
    
    Herbert Xu's avatar
    Herbert Xu committed
    
    	skb->ip_summed = info->ip_summed;
    	skb->csum = info->csum;
    
    
    out:
    	return skb;
    }
    EXPORT_SYMBOL(napi_fraginfo_skb);
    
    
    int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
    
    	int err = NET_RX_SUCCESS;
    
    	if (netpoll_receive_skb(skb))
    
    	switch (ret) {
    	case GRO_NORMAL:
    
    	case GRO_HELD:
    		skb->protocol = eth_type_trans(skb, napi->dev);
    
    		if (ret == GRO_NORMAL)
    			return netif_receive_skb(skb);
    
    		skb_gro_pull(skb, -ETH_HLEN);
    		break;
    
    	case GRO_DROP:
    		err = NET_RX_DROP;
    		/* fall through */
    
    	case GRO_MERGED_FREE:
    		napi_reuse_skb(napi, skb);
    		break;
    	}
    
    Herbert Xu's avatar
    Herbert Xu committed
    
    	return err;
    }
    
    EXPORT_SYMBOL(napi_frags_finish);
    
    int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
    {
    	struct sk_buff *skb = napi_fraginfo_skb(napi, info);
    
    	if (!skb)
    		return NET_RX_DROP;
    
    	return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
    }
    
    Herbert Xu's avatar
    Herbert Xu committed
    EXPORT_SYMBOL(napi_gro_frags);
    
    
    static int process_backlog(struct napi_struct *napi, int quota)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	int work = 0;
    	struct softnet_data *queue = &__get_cpu_var(softnet_data);
    	unsigned long start_time = jiffies;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		struct sk_buff *skb;
    
    		local_irq_disable();
    		skb = __skb_dequeue(&queue->input_pkt_queue);
    
    		if (!skb) {
    			__napi_complete(napi);
    			local_irq_enable();
    			break;
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		local_irq_enable();
    
    
    		napi_gro_receive(napi, skb);
    
    	} while (++work < quota && jiffies == start_time);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /**
     * __napi_schedule - schedule for receive
    
     * @n: entry to schedule
    
     *
     * The entry's receive function will be scheduled to run
     */
    
    void __napi_schedule(struct napi_struct *n)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	local_irq_save(flags);
    	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
    	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
    	local_irq_restore(flags);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    void __napi_complete(struct napi_struct *n)
    {
    	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
    	BUG_ON(n->gro_list);
    
    	list_del(&n->poll_list);
    	smp_mb__before_clear_bit();
    	clear_bit(NAPI_STATE_SCHED, &n->state);
    }
    EXPORT_SYMBOL(__napi_complete);
    
    void napi_complete(struct napi_struct *n)
    {
    	unsigned long flags;
    
    	/*
    	 * don't let napi dequeue from the cpu poll list
    	 * just in case its running on a different cpu
    	 */
    	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
    		return;
    
    	napi_gro_flush(n);
    	local_irq_save(flags);
    	__napi_complete(n);
    	local_irq_restore(flags);
    }
    EXPORT_SYMBOL(napi_complete);
    
    void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
    		    int (*poll)(struct napi_struct *, int), int weight)
    {
    	INIT_LIST_HEAD(&napi->poll_list);
    
    	napi->gro_list = NULL;
    
    Herbert Xu's avatar
    Herbert Xu committed
    	napi->skb = NULL;
    
    	napi->poll = poll;
    	napi->weight = weight;
    	list_add(&napi->dev_list, &dev->napi_list);
    	napi->dev = dev;
    
    Herbert Xu's avatar
    Herbert Xu committed
    #ifdef CONFIG_NETPOLL
    
    	spin_lock_init(&napi->poll_lock);
    	napi->poll_owner = -1;
    #endif
    	set_bit(NAPI_STATE_SCHED, &napi->state);
    }
    EXPORT_SYMBOL(netif_napi_add);
    
    void netif_napi_del(struct napi_struct *napi)
    {
    	struct sk_buff *skb, *next;
    
    
    	list_del_init(&napi->dev_list);
    
    Herbert Xu's avatar
    Herbert Xu committed
    	kfree(napi->skb);
    
    
    	for (skb = napi->gro_list; skb; skb = next) {
    		next = skb->next;
    		skb->next = NULL;
    		kfree_skb(skb);
    	}
    
    	napi->gro_list = NULL;
    
    }
    EXPORT_SYMBOL(netif_napi_del);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    static void net_rx_action(struct softirq_action *h)
    {
    
    	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
    
    	unsigned long time_limit = jiffies + 2;
    
    	int budget = netdev_budget;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	local_irq_disable();
    
    
    	while (!list_empty(list)) {
    		struct napi_struct *n;
    		int work, weight;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* If softirq window is exhuasted then punt.
    
    		 * Allow this to run for 2 jiffies since which will allow
    		 * an average latency of 1.5/HZ.
    
    		if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			goto softnet_break;
    
    		local_irq_enable();
    
    
    		/* Even though interrupts have been re-enabled, this
    		 * access is safe because interrupts can only add new
    		 * entries to the tail of this list, and only ->poll()
    		 * calls can remove this head entry from the list.
    		 */
    		n = list_entry(list->next, struct napi_struct, poll_list);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* This NAPI_STATE_SCHED test is for avoiding a race
    		 * with netpoll's poll_napi().  Only the entity which
    		 * obtains the lock and sees NAPI_STATE_SCHED set will
    		 * actually make the ->poll() call.  Therefore we avoid
    		 * accidently calling ->poll() when NAPI is not scheduled.
    		 */
    		work = 0;
    		if (test_bit(NAPI_STATE_SCHED, &n->state))
    			work = n->poll(n, weight);
    
    
    		WARN_ON_ONCE(work > weight);
    
    		budget -= work;
    
    		local_irq_disable();
    
    		/* Drivers must not modify the NAPI state if they
    		 * consume the entire weight.  In such cases this code
    		 * still "owns" the NAPI instance and therefore can
    		 * move the instance around on the list at-will.
    		 */
    
    		if (unlikely(work == weight)) {
    			if (unlikely(napi_disable_pending(n)))
    				__napi_complete(n);
    			else
    				list_move_tail(&n->poll_list, list);
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    out:
    
    #ifdef CONFIG_NET_DMA
    	/*
    	 * There may not be any more sk_buffs coming right now, so push
    	 * any pending DMA copies to hardware
    	 */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return;
    
    softnet_break:
    	__get_cpu_var(netdev_rx_stat).time_squeeze++;
    	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
    	goto out;
    }
    
    static gifconf_func_t * gifconf_list [NPROTO];
    
    /**
     *	register_gifconf	-	register a SIOCGIF handler
     *	@family: Address family
     *	@gifconf: Function handler
     *
     *	Register protocol dependent address dumping routines. The handler
     *	that is passed must not be freed or reused until it has been replaced
     *	by another handler.
     */
    int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
    {
    	if (family >= NPROTO)
    		return -EINVAL;
    	gifconf_list[family] = gifconf;
    	return 0;
    }
    
    
    /*
     *	Map an interface index to its name (SIOCGIFNAME)
     */
    
    /*
     *	We need this ioctl for efficient implementation of the
     *	if_indextoname() function required by the IPv6 API.  Without
     *	it, we would have to search all the interfaces to find a
     *	match.  --pb
     */
    
    
    static int dev_ifname(struct net *net, struct ifreq __user *arg)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct net_device *dev;
    	struct ifreq ifr;
    
    	/*
    	 *	Fetch the caller's info block.
    	 */
    
    	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
    		return -EFAULT;
    
    	read_lock(&dev_base_lock);
    
    	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!dev) {
    		read_unlock(&dev_base_lock);
    		return -ENODEV;
    	}
    
    	strcpy(ifr.ifr_name, dev->name);
    	read_unlock(&dev_base_lock);
    
    	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
    		return -EFAULT;
    	return 0;
    }
    
    /*
     *	Perform a SIOCGIFCONF call. This structure will change
     *	size eventually, and there is nothing I can do about it.
     *	Thus we will need a 'compatibility mode'.
     */
    
    
    static int dev_ifconf(struct net *net, char __user *arg)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct ifconf ifc;
    	struct net_device *dev;
    	char __user *pos;
    	int len;
    	int total;
    	int i;
    
    	/*
    	 *	Fetch the caller's info block.
    	 */
    
    	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
    		return -EFAULT;
    
    	pos = ifc.ifc_buf;
    	len = ifc.ifc_len;
    
    	/*
    	 *	Loop over the interfaces, and write an info block for each.
    	 */
    
    	total = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		for (i = 0; i < NPROTO; i++) {
    			if (gifconf_list[i]) {
    				int done;
    				if (!pos)
    					done = gifconf_list[i](dev, NULL, 0);
    				else
    					done = gifconf_list[i](dev, pos + total,
    							       len - total);
    				if (done < 0)
    					return -EFAULT;
    				total += done;
    			}
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/*
    	 *	All done.  Write the updated control block back to the caller.
    	 */
    	ifc.ifc_len = total;
    
    	/*
    	 * 	Both BSD and Solaris return 0 here, so we do too.
    	 */
    	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
    }
    
    #ifdef CONFIG_PROC_FS
    /*
     *	This is invoked by the /proc filesystem handler to display a device
     *	in detail.
     */
    
    void *dev_seq_start(struct seq_file *seq, loff_t *pos)
    
    	__acquires(dev_base_lock)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct net *net = seq_file_net(seq);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	struct net_device *dev;
    
    
    	read_lock(&dev_base_lock);
    	if (!*pos)
    		return SEQ_START_TOKEN;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		if (off++ == *pos)
    			return dev;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    {
    
    	struct net *net = seq_file_net(seq);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	++*pos;
    
    	return v == SEQ_START_TOKEN ?
    
    		first_net_device(net) : next_net_device((struct net_device *)v);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    void dev_seq_stop(struct seq_file *seq, void *v)
    
    	__releases(dev_base_lock)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	read_unlock(&dev_base_lock);
    }
    
    static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
    {
    
    	const struct net_device_stats *stats = dev_get_stats(dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
    		   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
    		   dev->name, stats->rx_bytes, stats->rx_packets,
    		   stats->rx_errors,
    		   stats->rx_dropped + stats->rx_missed_errors,
    		   stats->rx_fifo_errors,
    		   stats->rx_length_errors + stats->rx_over_errors +
    		    stats->rx_crc_errors + stats->rx_frame_errors,
    		   stats->rx_compressed, stats->multicast,
    		   stats->tx_bytes, stats->tx_packets,
    		   stats->tx_errors, stats->tx_dropped,
    		   stats->tx_fifo_errors, stats->collisions,
    		   stats->tx_carrier_errors +
    		    stats->tx_aborted_errors +
    		    stats->tx_window_errors +
    		    stats->tx_heartbeat_errors,
    		   stats->tx_compressed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    /*
     *	Called from the PROCfs module. This now uses the new arbitrary sized
     *	/proc/net interface to create /proc/net/dev
     */