Skip to content
Snippets Groups Projects
dev.c 151 KiB
Newer Older
  • Learn to ignore specific revisions
  • 	synchronize_rcu();
    
    	write_lock_bh(&dev_base_lock);
    	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
    
    	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
    
    	ret = notifier_to_errno(ret);
    
    	if (ret) {
    
    		/* err >= 0 after dev_alloc_name() or stores the first errno */
    		if (err >= 0) {
    
    			err = ret;
    			memcpy(dev->name, oldname, IFNAMSIZ);
    			goto rollback;
    
    		} else {
    			printk(KERN_ERR
    			       "%s: name change rollback failed: %d.\n",
    			       dev->name, ret);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	return err;
    }
    
    
    /**
     *	dev_set_alias - change ifalias of a device
     *	@dev: device
     *	@alias: name up to IFALIASZ
    
     *	@len: limit of bytes to copy from info
    
     *
     *	Set ifalias for a device,
     */
    int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
    {
    	ASSERT_RTNL();
    
    	if (len >= IFALIASZ)
    		return -EINVAL;
    
    
    	if (!len) {
    		if (dev->ifalias) {
    			kfree(dev->ifalias);
    			dev->ifalias = NULL;
    		}
    		return 0;
    	}
    
    
    	dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
    
    	if (!dev->ifalias)
    		return -ENOMEM;
    
    	strlcpy(dev->ifalias, alias, len+1);
    	return len;
    }
    
    
    
     *	netdev_features_change - device changes features
    
     *	@dev: device to cause notification
     *
     *	Called to indicate a device has changed features.
     */
    void netdev_features_change(struct net_device *dev)
    {
    
    	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
    
    }
    EXPORT_SYMBOL(netdev_features_change);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /**
     *	netdev_state_change - device changes state
     *	@dev: device to cause notification
     *
     *	Called to indicate a device has changed state. This function calls
     *	the notifier chains for netdev_chain and sends a NEWLINK message
     *	to the routing socket.
     */
    void netdev_state_change(struct net_device *dev)
    {
    	if (dev->flags & IFF_UP) {
    
    		call_netdevice_notifiers(NETDEV_CHANGE, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
    	}
    }
    
    EXPORT_SYMBOL(netdev_state_change);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    int netdev_bonding_change(struct net_device *dev, unsigned long event)
    
    	return call_netdevice_notifiers(event, dev);
    
    }
    EXPORT_SYMBOL(netdev_bonding_change);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /**
     *	dev_load 	- load a network module
    
     *	@net: the applicable net namespace
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *	@name: name of interface
     *
     *	If a network interface is not present and the process has suitable
     *	privileges this function loads the module. If module loading is not
     *	available in this kernel then it becomes a nop.
     */
    
    
    void dev_load(struct net *net, const char *name)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	struct net_device *dev;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	rcu_read_lock();
    	dev = dev_get_by_name_rcu(net, name);
    	rcu_read_unlock();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (!dev && capable(CAP_NET_ADMIN))
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		request_module("%s", name);
    }
    
    EXPORT_SYMBOL(dev_load);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static int __dev_open(struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	const struct net_device_ops *ops = dev->netdev_ops;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 *	Is it even present?
    	 */
    	if (!netif_device_present(dev))
    		return -ENODEV;
    
    
    	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
    	ret = notifier_to_errno(ret);
    	if (ret)
    		return ret;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 *	Call device private open method
    	 */
    	set_bit(__LINK_STATE_START, &dev->state);
    
    	if (ops->ndo_validate_addr)
    		ret = ops->ndo_validate_addr(dev);
    
    	if (!ret && ops->ndo_open)
    		ret = ops->ndo_open(dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 *	If it went open OK then:
    	 */
    
    
    	if (ret)
    		clear_bit(__LINK_STATE_START, &dev->state);
    	else {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/*
    		 *	Set the flags.
    		 */
    		dev->flags |= IFF_UP;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		/*
    		 *	Initialize multicasting status
    		 */
    
    		dev_set_rx_mode(dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    		/*
    		 *	Wakeup transmit queue engine
    		 */
    		dev_activate(dev);
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return ret;
    }
    
    /**
    
     *	dev_open	- prepare an interface for use.
     *	@dev:	device to open
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
    
     *	Takes a device from down to up state. The device's private open
     *	function is invoked and then the multicast lists are loaded. Finally
     *	the device is moved into the up state and a %NETDEV_UP message is
     *	sent to the netdev notifier chain.
     *
     *	Calling this function on an active interface is a nop. On a failure
     *	a negative errno code is returned.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    int dev_open(struct net_device *dev)
    {
    	int ret;
    
    	/*
    	 *	Is it already up?
    	 */
    	if (dev->flags & IFF_UP)
    		return 0;
    
    	/*
    	 *	Open device
    	 */
    	ret = __dev_open(dev);
    	if (ret < 0)
    		return ret;
    
    	/*
    	 *	... and announce new interface.
    	 */
    	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
    	call_netdevice_notifiers(NETDEV_UP, dev);
    
    	return ret;
    }
    EXPORT_SYMBOL(dev_open);
    
    static int __dev_close(struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	const struct net_device_ops *ops = dev->netdev_ops;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 *	Tell people we are going down, so that they can
    	 *	prepare to death, when device is still operating.
    	 */
    
    	call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	clear_bit(__LINK_STATE_START, &dev->state);
    
    	/* Synchronize to scheduled poll. We cannot touch poll list,
    
    	 * it can be even on different cpu. So just clear netif_running().
    	 *
    	 * dev->stop() will invoke napi_disable() on all of it's
    	 * napi_struct instances on this device.
    	 */
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	smp_mb__after_clear_bit(); /* Commit netif_running(). */
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	/*
    	 *	Call the device specific close. This cannot fail.
    	 *	Only if device is UP
    	 *
    	 *	We allow it to be called even after a DETACH hot-plug
    	 *	event.
    	 */
    
    	if (ops->ndo_stop)
    		ops->ndo_stop(dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	/*
    	 *	Device is now down.
    	 */
    
    	dev->flags &= ~IFF_UP;
    
    	/*
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	 */
    
    	net_dmaengine_put();
    
    	return 0;
    }
    
    /**
     *	dev_close - shutdown an interface.
     *	@dev: device to shutdown
     *
     *	This function moves an active device into down state. A
     *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
     *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
     *	chain.
     */
    int dev_close(struct net_device *dev)
    {
    	if (!(dev->flags & IFF_UP))
    		return 0;
    
    	__dev_close(dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	 * Tell people we are down
    
    	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
    	call_netdevice_notifiers(NETDEV_DOWN, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    }
    
    EXPORT_SYMBOL(dev_close);
    
    /**
     *	dev_disable_lro - disable Large Receive Offload on a device
     *	@dev: device
     *
     *	Disable Large Receive Offload (LRO) on a net device.  Must be
     *	called under RTNL.  This is needed if received packets may be
     *	forwarded to another interface.
     */
    void dev_disable_lro(struct net_device *dev)
    {
    	if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
    	    dev->ethtool_ops->set_flags) {
    		u32 flags = dev->ethtool_ops->get_flags(dev);
    		if (flags & ETH_FLAG_LRO) {
    			flags &= ~ETH_FLAG_LRO;
    			dev->ethtool_ops->set_flags(dev, flags);
    		}
    	}
    	WARN_ON(dev->features & NETIF_F_LRO);
    }
    EXPORT_SYMBOL(dev_disable_lro);
    
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	Device change register/unregister. These are not inline or static
     *	as we export them to the world.
     */
    
    /**
     *	register_netdevice_notifier - register a network notifier block
     *	@nb: notifier
     *
     *	Register a notifier to be called when network device events occur.
     *	The notifier passed is linked into the kernel structures and must
     *	not be reused until it has been unregistered. A negative errno code
     *	is returned on a failure.
     *
     * 	When registered all registration and up events are replayed
    
     *	to the new notifier to allow device to have a race free
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *	view of the network device list.
     */
    
    int register_netdevice_notifier(struct notifier_block *nb)
    {
    	struct net_device *dev;
    
    	struct net_device *last;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int err;
    
    	rtnl_lock();
    
    	err = raw_notifier_chain_register(&netdev_chain, nb);
    
    	if (dev_boot_phase)
    		goto unlock;
    	for_each_net(net) {
    		for_each_netdev(net, dev) {
    			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
    			err = notifier_to_errno(err);
    			if (err)
    				goto rollback;
    
    			if (!(dev->flags & IFF_UP))
    				continue;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    			nb->notifier_call(nb, NETDEV_UP, dev);
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	rtnl_unlock();
    	return err;
    
    	for_each_net(net) {
    		for_each_netdev(net, dev) {
    			if (dev == last)
    				break;
    
    			if (dev->flags & IFF_UP) {
    				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
    				nb->notifier_call(nb, NETDEV_DOWN, dev);
    			}
    			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
    
    			nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
    
    
    	raw_notifier_chain_unregister(&netdev_chain, nb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(register_netdevice_notifier);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /**
     *	unregister_netdevice_notifier - unregister a network notifier block
     *	@nb: notifier
     *
     *	Unregister a notifier previously registered by
     *	register_netdevice_notifier(). The notifier is unlinked into the
     *	kernel structures and may then be reused. A negative errno code
     *	is returned on a failure.
     */
    
    int unregister_netdevice_notifier(struct notifier_block *nb)
    {
    
    	err = raw_notifier_chain_unregister(&netdev_chain, nb);
    
    	rtnl_unlock();
    	return err;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(unregister_netdevice_notifier);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /**
     *	call_netdevice_notifiers - call all network notifier blocks
     *      @val: value passed unmodified to notifier function
    
     *      @dev: net_device pointer passed unmodified to notifier function
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
     *	Call all network notifier blocks.  Parameters and return value
    
     *	are as for raw_notifier_call_chain().
    
    int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	return raw_notifier_call_chain(&netdev_chain, val, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    /* When > 0 there are consumers of rx skb time stamps */
    static atomic_t netstamp_needed = ATOMIC_INIT(0);
    
    void net_enable_timestamp(void)
    {
    	atomic_inc(&netstamp_needed);
    }
    
    EXPORT_SYMBOL(net_enable_timestamp);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    void net_disable_timestamp(void)
    {
    	atomic_dec(&netstamp_needed);
    }
    
    EXPORT_SYMBOL(net_disable_timestamp);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static inline void net_timestamp(struct sk_buff *skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	if (atomic_read(&netstamp_needed))
    
    	else
    		skb->tstamp.tv64 = 0;
    
    /**
     * dev_forward_skb - loopback an skb to another netif
     *
     * @dev: destination network device
     * @skb: buffer to forward
     *
     * return values:
     *	NET_RX_SUCCESS	(no congestion)
     *	NET_RX_DROP     (packet was dropped)
     *
     * dev_forward_skb can be used for injecting an skb from the
     * start_xmit function of one device into the receive queue
     * of another device.
     *
     * The receiving device may be in another namespace, so
     * we have to clear all information in the skb that could
     * impact namespace isolation.
     */
    int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
    {
    	skb_orphan(skb);
    
    	if (!(dev->flags & IFF_UP))
    		return NET_RX_DROP;
    
    	if (skb->len > (dev->mtu + dev->hard_header_len))
    		return NET_RX_DROP;
    
    
    	skb->tstamp.tv64 = 0;
    	skb->pkt_type = PACKET_HOST;
    	skb->protocol = eth_type_trans(skb, dev);
    	return netif_rx(skb);
    }
    EXPORT_SYMBOL_GPL(dev_forward_skb);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	Support routine. Sends outgoing frames to any network
     *	taps currently in use.
     */
    
    
    static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct packet_type *ptype;
    
    #ifdef CONFIG_NET_CLS_ACT
    	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
    		net_timestamp(skb);
    #else
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	rcu_read_lock();
    	list_for_each_entry_rcu(ptype, &ptype_all, list) {
    		/* Never send packets back to the socket
    		 * they originated from - MvS (miquels@drinkel.ow.org)
    		 */
    		if ((ptype->dev == dev || !ptype->dev) &&
    		    (ptype->af_packet_priv == NULL ||
    		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
    
    			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (!skb2)
    				break;
    
    			/* skb->nh should be correctly
    			   set by sender, so that the second statement is
    			   just protection against buggy protocols.
    			 */
    
    			skb_reset_mac_header(skb2);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    			if (skb_network_header(skb2) < skb2->data ||
    
    			    skb2->network_header > skb2->tail) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    				if (net_ratelimit())
    					printk(KERN_CRIT "protocol %04x is "
    					       "buggy, dev %s\n",
    					       skb2->protocol, dev->name);
    
    				skb_reset_network_header(skb2);
    
    			skb2->transport_header = skb2->network_header;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			skb2->pkt_type = PACKET_OUTGOING;
    
    			ptype->func(skb2, skb->dev, ptype, skb->dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	}
    	rcu_read_unlock();
    }
    
    
    static inline void __netif_reschedule(struct Qdisc *q)
    
    	struct softnet_data *sd;
    	unsigned long flags;
    
    	local_irq_save(flags);
    	sd = &__get_cpu_var(softnet_data);
    	q->next_sched = sd->output_queue;
    	sd->output_queue = q;
    	raise_softirq_irqoff(NET_TX_SOFTIRQ);
    	local_irq_restore(flags);
    }
    
    void __netif_schedule(struct Qdisc *q)
    {
    	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
    		__netif_reschedule(q);
    
    }
    EXPORT_SYMBOL(__netif_schedule);
    
    
    void dev_kfree_skb_irq(struct sk_buff *skb)
    
    	if (atomic_dec_and_test(&skb->users)) {
    		struct softnet_data *sd;
    		unsigned long flags;
    
    		local_irq_save(flags);
    		sd = &__get_cpu_var(softnet_data);
    		skb->next = sd->completion_queue;
    		sd->completion_queue = skb;
    		raise_softirq_irqoff(NET_TX_SOFTIRQ);
    		local_irq_restore(flags);
    	}
    
    
    void dev_kfree_skb_any(struct sk_buff *skb)
    {
    	if (in_irq() || irqs_disabled())
    		dev_kfree_skb_irq(skb);
    	else
    		dev_kfree_skb(skb);
    }
    EXPORT_SYMBOL(dev_kfree_skb_any);
    
    
    
    /**
     * netif_device_detach - mark device as removed
     * @dev: network device
     *
     * Mark device as removed from system and therefore no longer available.
     */
    
    void netif_device_detach(struct net_device *dev)
    {
    	if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
    	    netif_running(dev)) {
    
    		netif_tx_stop_all_queues(dev);
    
    	}
    }
    EXPORT_SYMBOL(netif_device_detach);
    
    
    /**
     * netif_device_attach - mark device as attached
     * @dev: network device
     *
     * Mark device as attached from system and restart if needed.
     */
    
    void netif_device_attach(struct net_device *dev)
    {
    	if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
    	    netif_running(dev)) {
    
    		netif_tx_wake_all_queues(dev);
    
    		__netdev_watchdog_up(dev);
    
    	}
    }
    EXPORT_SYMBOL(netif_device_attach);
    
    
    static bool can_checksum_protocol(unsigned long features, __be16 protocol)
    {
    	return ((features & NETIF_F_GEN_CSUM) ||
    		((features & NETIF_F_IP_CSUM) &&
    		 protocol == htons(ETH_P_IP)) ||
    		((features & NETIF_F_IPV6_CSUM) &&
    
    		 protocol == htons(ETH_P_IPV6)) ||
    		((features & NETIF_F_FCOE_CRC) &&
    		 protocol == htons(ETH_P_FCOE)));
    
    }
    
    static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
    {
    	if (can_checksum_protocol(dev->features, skb->protocol))
    		return true;
    
    	if (skb->protocol == htons(ETH_P_8021Q)) {
    		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
    		if (can_checksum_protocol(dev->features & dev->vlan_features,
    					  veh->h_vlan_encapsulated_proto))
    			return true;
    	}
    
    	return false;
    }
    
    /**
     * skb_dev_set -- assign a new device to a buffer
     * @skb: buffer for the new device
     * @dev: network device
     *
     * If an skb is owned by a device already, we have to reset
     * all data private to the namespace a device belongs to
     * before assigning it a new device.
     */
    #ifdef CONFIG_NET_NS
    void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
    {
    	skb_dst_drop(skb);
    	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
    		secpath_reset(skb);
    		nf_reset(skb);
    		skb_init_secmark(skb);
    		skb->mark = 0;
    		skb->priority = 0;
    		skb->nf_trace = 0;
    		skb->ipvs_property = 0;
    #ifdef CONFIG_NET_SCHED
    		skb->tc_index = 0;
    #endif
    	}
    	skb->dev = dev;
    }
    EXPORT_SYMBOL(skb_set_dev);
    #endif /* CONFIG_NET_NS */
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     * Invalidate hardware checksum when packet is to be mangled, and
     * complete checksum manually on outgoing path.
     */
    
    int skb_checksum_help(struct sk_buff *skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (skb->ip_summed == CHECKSUM_COMPLETE)
    
    		goto out_set_summed;
    
    	if (unlikely(skb_shinfo(skb)->gso_size)) {
    		/* Let GSO fix up the checksum. */
    		goto out_set_summed;
    
    	offset = skb->csum_start - skb_headroom(skb);
    	BUG_ON(offset >= skb_headlen(skb));
    	csum = skb_checksum(skb, offset, skb->len - offset, 0);
    
    	offset += skb->csum_offset;
    	BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
    
    	if (skb_cloned(skb) &&
    	    !skb_clone_writable(skb, offset + sizeof(__sum16))) {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
    		if (ret)
    			goto out;
    	}
    
    
    	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	skb->ip_summed = CHECKSUM_NONE;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return ret;
    }
    
    EXPORT_SYMBOL(skb_checksum_help);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /**
     *	skb_gso_segment - Perform segmentation on skb.
     *	@skb: buffer to segment
    
     *	@features: features for the output path (see dev->features)
    
     *
     *	This function segments the given skb and returns a list of segments.
    
     *
     *	It may return NULL if the skb requires no segmentation.  This is
     *	only possible when GSO is used for verifying header integrity.
    
    struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
    
    {
    	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
    	struct packet_type *ptype;
    
    Al Viro's avatar
    Al Viro committed
    	__be16 type = skb->protocol;
    
    	skb_reset_mac_header(skb);
    
    	skb->mac_len = skb->network_header - skb->mac_header;
    
    	__skb_pull(skb, skb->mac_len);
    
    
    	if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
    		struct net_device *dev = skb->dev;
    		struct ethtool_drvinfo info = {};
    
    		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
    			dev->ethtool_ops->get_drvinfo(dev, &info);
    
    		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
    			"ip_summed=%d",
    		     info.driver, dev ? dev->features : 0L,
    		     skb->sk ? skb->sk->sk_route_caps : 0L,
    		     skb->len, skb->data_len, skb->ip_summed);
    
    
    		if (skb_header_cloned(skb) &&
    		    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
    			return ERR_PTR(err);
    	}
    
    
    	rcu_read_lock();
    
    	list_for_each_entry_rcu(ptype,
    			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
    
    		if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
    
    			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
    
    				err = ptype->gso_send_check(skb);
    				segs = ERR_PTR(err);
    				if (err || skb_gso_ok(skb, features))
    					break;
    
    				__skb_push(skb, (skb->data -
    						 skb_network_header(skb)));
    
    			segs = ptype->gso_segment(skb, features);
    
    	__skb_push(skb, skb->data - skb_mac_header(skb));
    
    	return segs;
    }
    EXPORT_SYMBOL(skb_gso_segment);
    
    
    /* Take action when hardware reception checksum errors are detected. */
    #ifdef CONFIG_BUG
    void netdev_rx_csum_fault(struct net_device *dev)
    {
    	if (net_ratelimit()) {
    
    		printk(KERN_ERR "%s: hw csum failure.\n",
    
    			dev ? dev->name : "<unknown>");
    
    		dump_stack();
    	}
    }
    EXPORT_SYMBOL(netdev_rx_csum_fault);
    #endif
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /* Actually, we should eliminate this check as soon as we know, that:
     * 1. IOMMU is present and allows to map all the memory.
     * 2. No high memory really exists on this machine.
     */
    
    static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
    {
    
    #ifdef CONFIG_HIGHMEM
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int i;
    
    	if (dev->features & NETIF_F_HIGHDMA)
    		return 0;
    
    	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
    		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
    			return 1;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return 0;
    }
    
    
    struct dev_gso_cb {
    	void (*destructor)(struct sk_buff *skb);
    };
    
    #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
    
    static void dev_gso_skb_destructor(struct sk_buff *skb)
    {
    	struct dev_gso_cb *cb;
    
    	do {
    		struct sk_buff *nskb = skb->next;
    
    		skb->next = nskb->next;
    		nskb->next = NULL;
    		kfree_skb(nskb);
    	} while (skb->next);
    
    	cb = DEV_GSO_CB(skb);
    	if (cb->destructor)
    		cb->destructor(skb);
    }
    
    /**
     *	dev_gso_segment - Perform emulated hardware segmentation on skb.
     *	@skb: buffer to segment
     *
     *	This function segments the given skb and stores the list of segments
     *	in skb->next.
     */
    static int dev_gso_segment(struct sk_buff *skb)
    {
    	struct net_device *dev = skb->dev;
    	struct sk_buff *segs;
    
    	int features = dev->features & ~(illegal_highdma(dev, skb) ?
    					 NETIF_F_SG : 0);
    
    	segs = skb_gso_segment(skb, features);
    
    	/* Verifying header integrity only. */
    	if (!segs)
    		return 0;
    
    	if (IS_ERR(segs))
    
    		return PTR_ERR(segs);
    
    	skb->next = segs;
    	DEV_GSO_CB(skb)->destructor = skb->destructor;
    	skb->destructor = dev_gso_skb_destructor;
    
    	return 0;
    }
    
    
    int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
    			struct netdev_queue *txq)
    
    	const struct net_device_ops *ops = dev->netdev_ops;
    
    	if (likely(!skb->next)) {
    
    		if (!list_empty(&ptype_all))
    
    			dev_queue_xmit_nit(skb, dev);
    
    
    		if (netif_needs_gso(dev, skb)) {
    			if (unlikely(dev_gso_segment(skb)))
    				goto out_kfree_skb;
    			if (skb->next)
    				goto gso;
    		}
    
    		/*
    		 * If device doesnt need skb->dst, release it right now while
    		 * its hot in this cpu cache
    		 */
    
    Eric Dumazet's avatar
    Eric Dumazet committed
    		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
    			skb_dst_drop(skb);
    
    
    		rc = ops->ndo_start_xmit(skb, dev);
    
    			txq_trans_update(txq);
    
    		/*
    		 * TODO: if skb_orphan() was called by
    		 * dev->hard_start_xmit() (for example, the unmodified
    		 * igb driver does that; bnx2 doesn't), then
    		 * skb_tx_software_timestamp() will be unable to send
    		 * back the time stamp.
    		 *
    		 * How can this be prevented? Always create another
    		 * reference to the socket before calling
    		 * dev->hard_start_xmit()? Prevent that skb_orphan()
    		 * does anything in dev->hard_start_xmit() by clearing
    		 * the skb destructor before the call and restoring it
    		 * afterwards, then doing the skb_orphan() ourselves?
    		 */
    		return rc;
    
    	do {
    		struct sk_buff *nskb = skb->next;
    
    		skb->next = nskb->next;
    		nskb->next = NULL;
    
    
    		/*
    		 * If device doesnt need nskb->dst, release it right now while
    		 * its hot in this cpu cache
    		 */
    		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
    			skb_dst_drop(nskb);
    
    
    		rc = ops->ndo_start_xmit(nskb, dev);
    
    			if (rc & ~NETDEV_TX_MASK)
    				goto out_kfree_gso_skb;
    
    			nskb->next = skb->next;
    
    			skb->next = nskb;
    			return rc;
    		}
    
    		txq_trans_update(txq);
    
    		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
    
    			return NETDEV_TX_BUSY;
    
    	} while (skb->next);
    
    out_kfree_gso_skb:
    	if (likely(skb->next == NULL))
    		skb->destructor = DEV_GSO_CB(skb)->destructor;
    
    out_kfree_skb:
    	kfree_skb(skb);
    
    static u32 hashrnd __read_mostly;
    
    u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
    
    	if (skb_rx_queue_recorded(skb)) {
    		hash = skb_get_rx_queue(skb);
    
    		while (unlikely(hash >= dev->real_num_tx_queues))
    
    			hash -= dev->real_num_tx_queues;
    		return hash;
    	}
    
    
    	if (skb->sk && skb->sk->sk_hash)
    
    		hash = skb->sk->sk_hash;
    
    		hash = skb->protocol;
    
    	hash = jhash_1word(hash, hashrnd);
    
    
    	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
    
    EXPORT_SYMBOL(skb_tx_hash);
    
    static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
    {
    	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
    		if (net_ratelimit()) {
    
    			netdev_warn(dev, "selects TX queue %d, but "
    
    			     "real number of TX queues is %d\n",
    
    			     queue_index, dev->real_num_tx_queues);
    
    static struct netdev_queue *dev_pick_tx(struct net_device *dev,
    					struct sk_buff *skb)
    {
    
    	u16 queue_index;
    	struct sock *sk = skb->sk;
    
    	if (sk_tx_queue_recorded(sk)) {
    		queue_index = sk_tx_queue_get(sk);
    	} else {
    		const struct net_device_ops *ops = dev->netdev_ops;
    
    		if (ops->ndo_select_queue) {
    			queue_index = ops->ndo_select_queue(dev, skb);
    
    			queue_index = dev_cap_txqueue(dev, queue_index);
    
    		} else {
    			queue_index = 0;
    			if (dev->real_num_tx_queues > 1)
    				queue_index = skb_tx_hash(dev, skb);
    
    			if (sk && sk->sk_dst_cache)
    				sk_tx_queue_set(sk, queue_index);
    		}
    	}
    
    	skb_set_queue_mapping(skb, queue_index);
    	return netdev_get_tx_queue(dev, queue_index);
    
    static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
    				 struct net_device *dev,