Skip to content
Snippets Groups Projects
dev.c 79.2 KiB
Newer Older
  • Learn to ignore specific revisions
  • Linus Torvalds's avatar
    Linus Torvalds committed
    static atomic_t netstamp_needed = ATOMIC_INIT(0);
    
    void net_enable_timestamp(void)
    {
    	atomic_inc(&netstamp_needed);
    }
    
    void net_disable_timestamp(void)
    {
    	atomic_dec(&netstamp_needed);
    }
    
    static inline void net_timestamp(struct timeval *stamp)
    {
    	if (atomic_read(&netstamp_needed))
    		do_gettimeofday(stamp);
    	else {
    		stamp->tv_sec = 0;
    		stamp->tv_usec = 0;
    	}
    }
    
    /*
     *	Support routine. Sends outgoing frames to any network
     *	taps currently in use.
     */
    
    void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
    {
    	struct packet_type *ptype;
    	net_timestamp(&skb->stamp);
    
    	rcu_read_lock();
    	list_for_each_entry_rcu(ptype, &ptype_all, list) {
    		/* Never send packets back to the socket
    		 * they originated from - MvS (miquels@drinkel.ow.org)
    		 */
    		if ((ptype->dev == dev || !ptype->dev) &&
    		    (ptype->af_packet_priv == NULL ||
    		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
    			struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
    			if (!skb2)
    				break;
    
    			/* skb->nh should be correctly
    			   set by sender, so that the second statement is
    			   just protection against buggy protocols.
    			 */
    			skb2->mac.raw = skb2->data;
    
    			if (skb2->nh.raw < skb2->data ||
    			    skb2->nh.raw > skb2->tail) {
    				if (net_ratelimit())
    					printk(KERN_CRIT "protocol %04x is "
    					       "buggy, dev %s\n",
    					       skb2->protocol, dev->name);
    				skb2->nh.raw = skb2->data;
    			}
    
    			skb2->h.raw = skb2->nh.raw;
    			skb2->pkt_type = PACKET_OUTGOING;
    			ptype->func(skb2, skb->dev, ptype);
    		}
    	}
    	rcu_read_unlock();
    }
    
    /*
     * Invalidate hardware checksum when packet is to be mangled, and
     * complete checksum manually on outgoing path.
     */
    int skb_checksum_help(struct sk_buff *skb, int inward)
    {
    	unsigned int csum;
    	int ret = 0, offset = skb->h.raw - skb->data;
    
    	if (inward) {
    		skb->ip_summed = CHECKSUM_NONE;
    		goto out;
    	}
    
    	if (skb_cloned(skb)) {
    		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
    		if (ret)
    			goto out;
    	}
    
    	if (offset > (int)skb->len)
    		BUG();
    	csum = skb_checksum(skb, offset, skb->len-offset, 0);
    
    	offset = skb->tail - skb->h.raw;
    	if (offset <= 0)
    		BUG();
    	if (skb->csum + 2 > offset)
    		BUG();
    
    	*(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
    	skb->ip_summed = CHECKSUM_NONE;
    out:	
    	return ret;
    }
    
    #ifdef CONFIG_HIGHMEM
    /* Actually, we should eliminate this check as soon as we know, that:
     * 1. IOMMU is present and allows to map all the memory.
     * 2. No high memory really exists on this machine.
     */
    
    static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
    {
    	int i;
    
    	if (dev->features & NETIF_F_HIGHDMA)
    		return 0;
    
    	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
    		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
    			return 1;
    
    	return 0;
    }
    #else
    #define illegal_highdma(dev, skb)	(0)
    #endif
    
    extern void skb_release_data(struct sk_buff *);
    
    /* Keep head the same: replace data */
    int __skb_linearize(struct sk_buff *skb, int gfp_mask)
    {
    	unsigned int size;
    	u8 *data;
    	long offset;
    	struct skb_shared_info *ninfo;
    	int headerlen = skb->data - skb->head;
    	int expand = (skb->tail + skb->data_len) - skb->end;
    
    	if (skb_shared(skb))
    		BUG();
    
    	if (expand <= 0)
    		expand = 0;
    
    	size = skb->end - skb->head + expand;
    	size = SKB_DATA_ALIGN(size);
    	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
    	if (!data)
    		return -ENOMEM;
    
    	/* Copy entire thing */
    	if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
    		BUG();
    
    	/* Set up shinfo */
    	ninfo = (struct skb_shared_info*)(data + size);
    	atomic_set(&ninfo->dataref, 1);
    	ninfo->tso_size = skb_shinfo(skb)->tso_size;
    	ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
    	ninfo->nr_frags = 0;
    	ninfo->frag_list = NULL;
    
    	/* Offset between the two in bytes */
    	offset = data - skb->head;
    
    	/* Free old data. */
    	skb_release_data(skb);
    
    	skb->head = data;
    	skb->end  = data + size;
    
    	/* Set up new pointers */
    	skb->h.raw   += offset;
    	skb->nh.raw  += offset;
    	skb->mac.raw += offset;
    	skb->tail    += offset;
    	skb->data    += offset;
    
    	/* We are no longer a clone, even if we were. */
    	skb->cloned    = 0;
    
    	skb->tail     += skb->data_len;
    	skb->data_len  = 0;
    	return 0;
    }
    
    #define HARD_TX_LOCK(dev, cpu) {			\
    	if ((dev->features & NETIF_F_LLTX) == 0) {	\
    		spin_lock(&dev->xmit_lock);		\
    		dev->xmit_lock_owner = cpu;		\
    	}						\
    }
    
    #define HARD_TX_UNLOCK(dev) {				\
    	if ((dev->features & NETIF_F_LLTX) == 0) {	\
    		dev->xmit_lock_owner = -1;		\
    		spin_unlock(&dev->xmit_lock);		\
    	}						\
    }
    
    /**
     *	dev_queue_xmit - transmit a buffer
     *	@skb: buffer to transmit
     *
     *	Queue a buffer for transmission to a network device. The caller must
     *	have set the device and priority and built the buffer before calling
     *	this function. The function can be called from an interrupt.
     *
     *	A negative errno code is returned on a failure. A success does not
     *	guarantee the frame will be transmitted as it may be dropped due
     *	to congestion or traffic shaping.
    
     *
     * -----------------------------------------------------------------------------------
     *      I notice this method can also return errors from the queue disciplines,
     *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
     *      be positive.
     *
     *      Regardless of the return value, the skb is consumed, so it is currently
     *      difficult to retry a send to this method.  (You can bump the ref count
     *      before sending to hold a reference for retry if you are careful.)
     *
     *      When calling this method, interrupts MUST be enabled.  This is because
     *      the BH enable code must have IRQs enabled so that it will not deadlock.
     *          --BLG
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    int dev_queue_xmit(struct sk_buff *skb)
    {
    	struct net_device *dev = skb->dev;
    	struct Qdisc *q;
    	int rc = -ENOMEM;
    
    	if (skb_shinfo(skb)->frag_list &&
    	    !(dev->features & NETIF_F_FRAGLIST) &&
    	    __skb_linearize(skb, GFP_ATOMIC))
    		goto out_kfree_skb;
    
    	/* Fragmented skb is linearized if device does not support SG,
    	 * or if at least one of fragments is in highmem and device
    	 * does not support DMA from it.
    	 */
    	if (skb_shinfo(skb)->nr_frags &&
    	    (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
    	    __skb_linearize(skb, GFP_ATOMIC))
    		goto out_kfree_skb;
    
    	/* If packet is not checksummed and device does not support
    	 * checksumming for this protocol, complete checksumming here.
    	 */
    	if (skb->ip_summed == CHECKSUM_HW &&
    	    (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
    	     (!(dev->features & NETIF_F_IP_CSUM) ||
    	      skb->protocol != htons(ETH_P_IP))))
    	      	if (skb_checksum_help(skb, 0))
    	      		goto out_kfree_skb;
    
    	/* Disable soft irqs for various locks below. Also 
    	 * stops preemption for RCU. 
    	 */
    	local_bh_disable(); 
    
    	/* Updates of qdisc are serialized by queue_lock. 
    	 * The struct Qdisc which is pointed to by qdisc is now a 
    	 * rcu structure - it may be accessed without acquiring 
    	 * a lock (but the structure may be stale.) The freeing of the
    	 * qdisc will be deferred until it's known that there are no 
    	 * more references to it.
    	 * 
    	 * If the qdisc has an enqueue function, we still need to 
    	 * hold the queue_lock before calling it, since queue_lock
    	 * also serializes access to the device queue.
    	 */
    
    	q = rcu_dereference(dev->qdisc);
    #ifdef CONFIG_NET_CLS_ACT
    	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
    #endif
    	if (q->enqueue) {
    		/* Grab device queue */
    		spin_lock(&dev->queue_lock);
    
    		rc = q->enqueue(skb, q);
    
    		qdisc_run(dev);
    
    		spin_unlock(&dev->queue_lock);
    		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
    		goto out;
    	}
    
    	/* The device has no queue. Common case for software devices:
    	   loopback, all the sorts of tunnels...
    
    	   Really, it is unlikely that xmit_lock protection is necessary here.
    	   (f.e. loopback and IP tunnels are clean ignoring statistics
    	   counters.)
    	   However, it is possible, that they rely on protection
    	   made by us here.
    
    	   Check this and shot the lock. It is not prone from deadlocks.
    	   Either shot noqueue qdisc, it is even simpler 8)
    	 */
    	if (dev->flags & IFF_UP) {
    		int cpu = smp_processor_id(); /* ok because BHs are off */
    
    		if (dev->xmit_lock_owner != cpu) {
    
    			HARD_TX_LOCK(dev, cpu);
    
    			if (!netif_queue_stopped(dev)) {
    				if (netdev_nit)
    					dev_queue_xmit_nit(skb, dev);
    
    				rc = 0;
    				if (!dev->hard_start_xmit(skb, dev)) {
    					HARD_TX_UNLOCK(dev);
    					goto out;
    				}
    			}
    			HARD_TX_UNLOCK(dev);
    			if (net_ratelimit())
    				printk(KERN_CRIT "Virtual device %s asks to "
    				       "queue packet!\n", dev->name);
    		} else {
    			/* Recursion is detected! It is possible,
    			 * unfortunately */
    			if (net_ratelimit())
    				printk(KERN_CRIT "Dead loop on virtual device "
    				       "%s, fix it urgently!\n", dev->name);
    		}
    	}
    
    	rc = -ENETDOWN;
    	local_bh_enable();
    
    out_kfree_skb:
    	kfree_skb(skb);
    	return rc;
    out:
    	local_bh_enable();
    	return rc;
    }
    
    
    /*=======================================================================
    			Receiver routines
      =======================================================================*/
    
    int netdev_max_backlog = 300;
    int weight_p = 64;            /* old backlog weight */
    
    DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
    
    
    /**
     *	netif_rx	-	post buffer to the network code
     *	@skb: buffer to post
     *
     *	This function receives a packet from a device driver and queues it for
     *	the upper (protocol) levels to process.  It always succeeds. The buffer
     *	may be dropped during processing for congestion control or by the
     *	protocol layers.
     *
     *	return values:
     *	NET_RX_SUCCESS	(no congestion)
     *	NET_RX_CN_LOW   (low congestion)
     *	NET_RX_CN_MOD   (moderate congestion)
     *	NET_RX_CN_HIGH  (high congestion)
     *	NET_RX_DROP     (packet was dropped)
     *
     */
    
    int netif_rx(struct sk_buff *skb)
    {
    	struct softnet_data *queue;
    	unsigned long flags;
    
    	/* if netpoll wants it, pretend we never saw it */
    	if (netpoll_rx(skb))
    		return NET_RX_DROP;
    
    	if (!skb->stamp.tv_sec)
    		net_timestamp(&skb->stamp);
    
    	/*
    	 * The code is rearranged so that the path is the most
    	 * short when CPU is congested, but is still operating.
    	 */
    	local_irq_save(flags);
    	queue = &__get_cpu_var(softnet_data);
    
    	__get_cpu_var(netdev_rx_stat).total++;
    	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
    		if (queue->input_pkt_queue.qlen) {
    enqueue:
    			dev_hold(skb->dev);
    			__skb_queue_tail(&queue->input_pkt_queue, skb);
    			local_irq_restore(flags);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    
    		netif_rx_schedule(&queue->backlog_dev);
    		goto enqueue;
    	}
    
    	__get_cpu_var(netdev_rx_stat).dropped++;
    	local_irq_restore(flags);
    
    	kfree_skb(skb);
    	return NET_RX_DROP;
    }
    
    int netif_rx_ni(struct sk_buff *skb)
    {
    	int err;
    
    	preempt_disable();
    	err = netif_rx(skb);
    	if (local_softirq_pending())
    		do_softirq();
    	preempt_enable();
    
    	return err;
    }
    
    EXPORT_SYMBOL(netif_rx_ni);
    
    static __inline__ void skb_bond(struct sk_buff *skb)
    {
    	struct net_device *dev = skb->dev;
    
    	if (dev->master) {
    		skb->real_dev = skb->dev;
    		skb->dev = dev->master;
    	}
    }
    
    static void net_tx_action(struct softirq_action *h)
    {
    	struct softnet_data *sd = &__get_cpu_var(softnet_data);
    
    	if (sd->completion_queue) {
    		struct sk_buff *clist;
    
    		local_irq_disable();
    		clist = sd->completion_queue;
    		sd->completion_queue = NULL;
    		local_irq_enable();
    
    		while (clist) {
    			struct sk_buff *skb = clist;
    			clist = clist->next;
    
    			BUG_TRAP(!atomic_read(&skb->users));
    			__kfree_skb(skb);
    		}
    	}
    
    	if (sd->output_queue) {
    		struct net_device *head;
    
    		local_irq_disable();
    		head = sd->output_queue;
    		sd->output_queue = NULL;
    		local_irq_enable();
    
    		while (head) {
    			struct net_device *dev = head;
    			head = head->next_sched;
    
    			smp_mb__before_clear_bit();
    			clear_bit(__LINK_STATE_SCHED, &dev->state);
    
    			if (spin_trylock(&dev->queue_lock)) {
    				qdisc_run(dev);
    				spin_unlock(&dev->queue_lock);
    			} else {
    				netif_schedule(dev);
    			}
    		}
    	}
    }
    
    static __inline__ int deliver_skb(struct sk_buff *skb,
    				  struct packet_type *pt_prev)
    {
    	atomic_inc(&skb->users);
    	return pt_prev->func(skb, skb->dev, pt_prev);
    }
    
    #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
    int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
    struct net_bridge;
    struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
    						unsigned char *addr);
    void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
    
    static __inline__ int handle_bridge(struct sk_buff **pskb,
    				    struct packet_type **pt_prev, int *ret)
    {
    	struct net_bridge_port *port;
    
    	if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
    	    (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
    		return 0;
    
    	if (*pt_prev) {
    		*ret = deliver_skb(*pskb, *pt_prev);
    		*pt_prev = NULL;
    	} 
    	
    	return br_handle_frame_hook(port, pskb);
    }
    #else
    #define handle_bridge(skb, pt_prev, ret)	(0)
    #endif
    
    #ifdef CONFIG_NET_CLS_ACT
    /* TODO: Maybe we should just force sch_ingress to be compiled in
     * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
     * a compare and 2 stores extra right now if we dont have it on
     * but have CONFIG_NET_CLS_ACT
     * NOTE: This doesnt stop any functionality; if you dont have 
     * the ingress scheduler, you just cant add policies on ingress.
     *
     */
    static int ing_filter(struct sk_buff *skb) 
    {
    	struct Qdisc *q;
    	struct net_device *dev = skb->dev;
    	int result = TC_ACT_OK;
    	
    	if (dev->qdisc_ingress) {
    		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
    		if (MAX_RED_LOOP < ttl++) {
    			printk("Redir loop detected Dropping packet (%s->%s)\n",
    				skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
    			return TC_ACT_SHOT;
    		}
    
    		skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
    
    		skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
    		if (NULL == skb->input_dev) {
    			skb->input_dev = skb->dev;
    			printk("ing_filter:  fixed  %s out %s\n",skb->input_dev->name,skb->dev->name);
    		}
    		spin_lock(&dev->ingress_lock);
    		if ((q = dev->qdisc_ingress) != NULL)
    			result = q->enqueue(skb, q);
    		spin_unlock(&dev->ingress_lock);
    
    	}
    
    	return result;
    }
    #endif
    
    int netif_receive_skb(struct sk_buff *skb)
    {
    	struct packet_type *ptype, *pt_prev;
    	int ret = NET_RX_DROP;
    	unsigned short type;
    
    	/* if we've gotten here through NAPI, check netpoll */
    	if (skb->dev->poll && netpoll_rx(skb))
    		return NET_RX_DROP;
    
    	if (!skb->stamp.tv_sec)
    		net_timestamp(&skb->stamp);
    
    	skb_bond(skb);
    
    	__get_cpu_var(netdev_rx_stat).total++;
    
    	skb->h.raw = skb->nh.raw = skb->data;
    	skb->mac_len = skb->nh.raw - skb->mac.raw;
    
    	pt_prev = NULL;
    
    	rcu_read_lock();
    
    #ifdef CONFIG_NET_CLS_ACT
    	if (skb->tc_verd & TC_NCLS) {
    		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
    		goto ncls;
    	}
    #endif
    
    	list_for_each_entry_rcu(ptype, &ptype_all, list) {
    		if (!ptype->dev || ptype->dev == skb->dev) {
    			if (pt_prev) 
    				ret = deliver_skb(skb, pt_prev);
    			pt_prev = ptype;
    		}
    	}
    
    #ifdef CONFIG_NET_CLS_ACT
    	if (pt_prev) {
    		ret = deliver_skb(skb, pt_prev);
    		pt_prev = NULL; /* noone else should process this after*/
    	} else {
    		skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
    	}
    
    	ret = ing_filter(skb);
    
    	if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
    		kfree_skb(skb);
    		goto out;
    	}
    
    	skb->tc_verd = 0;
    ncls:
    #endif
    
    	handle_diverter(skb);
    
    	if (handle_bridge(&skb, &pt_prev, &ret))
    		goto out;
    
    	type = skb->protocol;
    	list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
    		if (ptype->type == type &&
    		    (!ptype->dev || ptype->dev == skb->dev)) {
    			if (pt_prev) 
    				ret = deliver_skb(skb, pt_prev);
    			pt_prev = ptype;
    		}
    	}
    
    	if (pt_prev) {
    		ret = pt_prev->func(skb, skb->dev, pt_prev);
    	} else {
    		kfree_skb(skb);
    		/* Jamal, now you will not able to escape explaining
    		 * me how you were going to use this. :-)
    		 */
    		ret = NET_RX_DROP;
    	}
    
    out:
    	rcu_read_unlock();
    	return ret;
    }
    
    static int process_backlog(struct net_device *backlog_dev, int *budget)
    {
    	int work = 0;
    	int quota = min(backlog_dev->quota, *budget);
    	struct softnet_data *queue = &__get_cpu_var(softnet_data);
    	unsigned long start_time = jiffies;
    
    
    	backlog_dev->weight = weight_p;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962
    	for (;;) {
    		struct sk_buff *skb;
    		struct net_device *dev;
    
    		local_irq_disable();
    		skb = __skb_dequeue(&queue->input_pkt_queue);
    		if (!skb)
    			goto job_done;
    		local_irq_enable();
    
    		dev = skb->dev;
    
    		netif_receive_skb(skb);
    
    		dev_put(dev);
    
    		work++;
    
    		if (work >= quota || jiffies - start_time > 1)
    			break;
    
    	}
    
    	backlog_dev->quota -= work;
    	*budget -= work;
    	return -1;
    
    job_done:
    	backlog_dev->quota -= work;
    	*budget -= work;
    
    	list_del(&backlog_dev->poll_list);
    	smp_mb__before_clear_bit();
    	netif_poll_enable(backlog_dev);
    
    	local_irq_enable();
    	return 0;
    }
    
    static void net_rx_action(struct softirq_action *h)
    {
    	struct softnet_data *queue = &__get_cpu_var(softnet_data);
    	unsigned long start_time = jiffies;
    	int budget = netdev_max_backlog;
    
    	
    	local_irq_disable();
    
    	while (!list_empty(&queue->poll_list)) {
    		struct net_device *dev;
    
    		if (budget <= 0 || jiffies - start_time > 1)
    			goto softnet_break;
    
    		local_irq_enable();
    
    		dev = list_entry(queue->poll_list.next,
    				 struct net_device, poll_list);
    		netpoll_poll_lock(dev);
    
    		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
    			netpoll_poll_unlock(dev);
    			local_irq_disable();
    			list_del(&dev->poll_list);
    			list_add_tail(&dev->poll_list, &queue->poll_list);
    			if (dev->quota < 0)
    				dev->quota += dev->weight;
    			else
    				dev->quota = dev->weight;
    		} else {
    			netpoll_poll_unlock(dev);
    			dev_put(dev);
    			local_irq_disable();
    		}
    	}
    out:
    	local_irq_enable();
    	return;
    
    softnet_break:
    	__get_cpu_var(netdev_rx_stat).time_squeeze++;
    	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
    	goto out;
    }
    
    static gifconf_func_t * gifconf_list [NPROTO];
    
    /**
     *	register_gifconf	-	register a SIOCGIF handler
     *	@family: Address family
     *	@gifconf: Function handler
     *
     *	Register protocol dependent address dumping routines. The handler
     *	that is passed must not be freed or reused until it has been replaced
     *	by another handler.
     */
    int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
    {
    	if (family >= NPROTO)
    		return -EINVAL;
    	gifconf_list[family] = gifconf;
    	return 0;
    }
    
    
    /*
     *	Map an interface index to its name (SIOCGIFNAME)
     */
    
    /*
     *	We need this ioctl for efficient implementation of the
     *	if_indextoname() function required by the IPv6 API.  Without
     *	it, we would have to search all the interfaces to find a
     *	match.  --pb
     */
    
    static int dev_ifname(struct ifreq __user *arg)
    {
    	struct net_device *dev;
    	struct ifreq ifr;
    
    	/*
    	 *	Fetch the caller's info block.
    	 */
    
    	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
    		return -EFAULT;
    
    	read_lock(&dev_base_lock);
    	dev = __dev_get_by_index(ifr.ifr_ifindex);
    	if (!dev) {
    		read_unlock(&dev_base_lock);
    		return -ENODEV;
    	}
    
    	strcpy(ifr.ifr_name, dev->name);
    	read_unlock(&dev_base_lock);
    
    	if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
    		return -EFAULT;
    	return 0;
    }
    
    /*
     *	Perform a SIOCGIFCONF call. This structure will change
     *	size eventually, and there is nothing I can do about it.
     *	Thus we will need a 'compatibility mode'.
     */
    
    static int dev_ifconf(char __user *arg)
    {
    	struct ifconf ifc;
    	struct net_device *dev;
    	char __user *pos;
    	int len;
    	int total;
    	int i;
    
    	/*
    	 *	Fetch the caller's info block.
    	 */
    
    	if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
    		return -EFAULT;
    
    	pos = ifc.ifc_buf;
    	len = ifc.ifc_len;
    
    	/*
    	 *	Loop over the interfaces, and write an info block for each.
    	 */
    
    	total = 0;
    	for (dev = dev_base; dev; dev = dev->next) {
    		for (i = 0; i < NPROTO; i++) {
    			if (gifconf_list[i]) {
    				int done;
    				if (!pos)
    					done = gifconf_list[i](dev, NULL, 0);
    				else
    					done = gifconf_list[i](dev, pos + total,
    							       len - total);
    				if (done < 0)
    					return -EFAULT;
    				total += done;
    			}
    		}
      	}
    
    	/*
    	 *	All done.  Write the updated control block back to the caller.
    	 */
    	ifc.ifc_len = total;
    
    	/*
    	 * 	Both BSD and Solaris return 0 here, so we do too.
    	 */
    	return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
    }
    
    #ifdef CONFIG_PROC_FS
    /*
     *	This is invoked by the /proc filesystem handler to display a device
     *	in detail.
     */
    static __inline__ struct net_device *dev_get_idx(loff_t pos)
    {
    	struct net_device *dev;
    	loff_t i;
    
    	for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
    
    	return i == pos ? dev : NULL;
    }
    
    void *dev_seq_start(struct seq_file *seq, loff_t *pos)
    {
    	read_lock(&dev_base_lock);
    	return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
    }
    
    void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    {
    	++*pos;
    	return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
    }
    
    void dev_seq_stop(struct seq_file *seq, void *v)
    {
    	read_unlock(&dev_base_lock);
    }
    
    static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
    {
    	if (dev->get_stats) {
    		struct net_device_stats *stats = dev->get_stats(dev);
    
    		seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
    				"%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
    			   dev->name, stats->rx_bytes, stats->rx_packets,
    			   stats->rx_errors,
    			   stats->rx_dropped + stats->rx_missed_errors,
    			   stats->rx_fifo_errors,
    			   stats->rx_length_errors + stats->rx_over_errors +
    			     stats->rx_crc_errors + stats->rx_frame_errors,
    			   stats->rx_compressed, stats->multicast,
    			   stats->tx_bytes, stats->tx_packets,
    			   stats->tx_errors, stats->tx_dropped,
    			   stats->tx_fifo_errors, stats->collisions,
    			   stats->tx_carrier_errors +
    			     stats->tx_aborted_errors +
    			     stats->tx_window_errors +
    			     stats->tx_heartbeat_errors,
    			   stats->tx_compressed);
    	} else
    		seq_printf(seq, "%6s: No statistics available.\n", dev->name);
    }
    
    /*
     *	Called from the PROCfs module. This now uses the new arbitrary sized
     *	/proc/net interface to create /proc/net/dev
     */
    static int dev_seq_show(struct seq_file *seq, void *v)
    {
    	if (v == SEQ_START_TOKEN)
    		seq_puts(seq, "Inter-|   Receive                            "
    			      "                    |  Transmit\n"
    			      " face |bytes    packets errs drop fifo frame "
    			      "compressed multicast|bytes    packets errs "
    			      "drop fifo colls carrier compressed\n");
    	else
    		dev_seq_printf_stats(seq, v);
    	return 0;
    }
    
    static struct netif_rx_stats *softnet_get_online(loff_t *pos)
    {
    	struct netif_rx_stats *rc = NULL;
    
    	while (*pos < NR_CPUS)
    	       	if (cpu_online(*pos)) {
    			rc = &per_cpu(netdev_rx_stat, *pos);
    			break;
    		} else
    			++*pos;
    	return rc;
    }
    
    static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
    {
    	return softnet_get_online(pos);
    }
    
    static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
    {
    	++*pos;
    	return softnet_get_online(pos);
    }
    
    static void softnet_seq_stop(struct seq_file *seq, void *v)
    {
    }
    
    static int softnet_seq_show(struct seq_file *seq, void *v)
    {
    	struct netif_rx_stats *s = v;
    
    	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
    
    		   s->total, s->dropped, s->time_squeeze, 0,
    
    		   0, 0, 0, 0, /* was fastroute */
    		   s->cpu_collision );
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    }
    
    static struct seq_operations dev_seq_ops = {
    	.start = dev_seq_start,
    	.next  = dev_seq_next,
    	.stop  = dev_seq_stop,
    	.show  = dev_seq_show,
    };
    
    static int dev_seq_open(struct inode *inode, struct file *file)
    {
    	return seq_open(file, &dev_seq_ops);
    }
    
    static struct file_operations dev_seq_fops = {
    	.owner	 = THIS_MODULE,
    	.open    = dev_seq_open,
    	.read    = seq_read,
    	.llseek  = seq_lseek,
    	.release = seq_release,
    };
    
    static struct seq_operations softnet_seq_ops = {
    	.start = softnet_seq_start,
    	.next  = softnet_seq_next,
    	.stop  = softnet_seq_stop,
    	.show  = softnet_seq_show,
    };
    
    static int softnet_seq_open(struct inode *inode, struct file *file)
    {
    	return seq_open(file, &softnet_seq_ops);
    }