Skip to content
Snippets Groups Projects
dev.c 154 KiB
Newer Older
  • Learn to ignore specific revisions
  • EXPORT_SYMBOL(dev_alloc_name);
    
    static int dev_alloc_name_ns(struct net *net,
    			     struct net_device *dev,
    			     const char *name)
    
    	char buf[IFNAMSIZ];
    	int ret;
    
    	ret = __dev_alloc_name(net, name, buf);
    	if (ret >= 0)
    		strlcpy(dev->name, buf, IFNAMSIZ);
    	return ret;
    }
    
    static int dev_get_valid_name(struct net *net,
    			      struct net_device *dev,
    			      const char *name)
    {
    	BUG_ON(!net);
    
    	if (!dev_valid_name(name))
    		return -EINVAL;
    
    
    	if (strchr(name, '%'))
    
    		return dev_alloc_name_ns(net, dev, name);
    
    	else if (__dev_get_by_name(net, name))
    		return -EEXIST;
    
    	else if (dev->name != name)
    		strlcpy(dev->name, name, IFNAMSIZ);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /**
     *	dev_change_name - change name of a device
     *	@dev: device
     *	@newname: name (or format string) must be at least IFNAMSIZ
     *
     *	Change name of a device, can pass format strings "eth%d".
     *	for wildcarding.
     */
    
    int dev_change_name(struct net_device *dev, const char *newname)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	char oldname[IFNAMSIZ];
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int err = 0;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	ASSERT_RTNL();
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (dev->flags & IFF_UP)
    		return -EBUSY;
    
    
    	write_seqcount_begin(&devnet_rename_seq);
    
    
    	if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
    
    		write_seqcount_end(&devnet_rename_seq);
    
    	memcpy(oldname, dev->name, IFNAMSIZ);
    
    
    	err = dev_get_valid_name(net, dev, newname);
    
    		write_seqcount_end(&devnet_rename_seq);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	ret = device_rename(&dev->dev, dev->name);
    	if (ret) {
    		memcpy(dev->name, oldname, IFNAMSIZ);
    
    		write_seqcount_end(&devnet_rename_seq);
    
    	write_seqcount_end(&devnet_rename_seq);
    
    	write_lock_bh(&dev_base_lock);
    
    	hlist_del_rcu(&dev->name_hlist);
    
    	write_unlock_bh(&dev_base_lock);
    
    	synchronize_rcu();
    
    	write_lock_bh(&dev_base_lock);
    	hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
    
    	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
    
    	ret = notifier_to_errno(ret);
    
    	if (ret) {
    
    		/* err >= 0 after dev_alloc_name() or stores the first errno */
    		if (err >= 0) {
    
    			write_seqcount_begin(&devnet_rename_seq);
    
    			memcpy(dev->name, oldname, IFNAMSIZ);
    			goto rollback;
    
    			pr_err("%s: name change rollback failed: %d\n",
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    	return err;
    }
    
    
    /**
     *	dev_set_alias - change ifalias of a device
     *	@dev: device
     *	@alias: name up to IFALIASZ
    
     *	@len: limit of bytes to copy from info
    
     *
     *	Set ifalias for a device,
     */
    int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
    {
    
    	ASSERT_RTNL();
    
    	if (len >= IFALIASZ)
    		return -EINVAL;
    
    
    		kfree(dev->ifalias);
    		dev->ifalias = NULL;
    
    	new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
    	if (!new_ifalias)
    
    	dev->ifalias = new_ifalias;
    
    
    	strlcpy(dev->ifalias, alias, len+1);
    	return len;
    }
    
    
    
     *	netdev_features_change - device changes features
    
     *	@dev: device to cause notification
     *
     *	Called to indicate a device has changed features.
     */
    void netdev_features_change(struct net_device *dev)
    {
    
    	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
    
    }
    EXPORT_SYMBOL(netdev_features_change);
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /**
     *	netdev_state_change - device changes state
     *	@dev: device to cause notification
     *
     *	Called to indicate a device has changed state. This function calls
     *	the notifier chains for netdev_chain and sends a NEWLINK message
     *	to the routing socket.
     */
    void netdev_state_change(struct net_device *dev)
    {
    	if (dev->flags & IFF_UP) {
    
    		call_netdevice_notifiers(NETDEV_CHANGE, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
    	}
    }
    
    EXPORT_SYMBOL(netdev_state_change);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    /**
     * 	netdev_notify_peers - notify network peers about existence of @dev
     * 	@dev: network device
     *
     * Generate traffic such that interested network peers are aware of
     * @dev, such as by generating a gratuitous ARP. This may be used when
     * a device wants to inform the rest of the network about some sort of
     * reconfiguration such as a failover event or virtual machine
     * migration.
     */
    void netdev_notify_peers(struct net_device *dev)
    
    	rtnl_lock();
    	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
    	rtnl_unlock();
    
    EXPORT_SYMBOL(netdev_notify_peers);
    
    static int __dev_open(struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	const struct net_device_ops *ops = dev->netdev_ops;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	if (!netif_device_present(dev))
    		return -ENODEV;
    
    
    	/* Block netpoll from trying to do any rx path servicing.
    	 * If we don't do this there is a chance ndo_poll_controller
    	 * or ndo_poll may be running while we open the device
    	 */
    	ret = netpoll_rx_disable(dev);
    	if (ret)
    		return ret;
    
    
    	ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
    	ret = notifier_to_errno(ret);
    	if (ret)
    		return ret;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	set_bit(__LINK_STATE_START, &dev->state);
    
    	if (ops->ndo_validate_addr)
    		ret = ops->ndo_validate_addr(dev);
    
    	if (!ret && ops->ndo_open)
    		ret = ops->ndo_open(dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	if (ret)
    		clear_bit(__LINK_STATE_START, &dev->state);
    	else {
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		dev->flags |= IFF_UP;
    
    		dev_set_rx_mode(dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		dev_activate(dev);
    
    		add_device_randomness(dev->dev_addr, dev->addr_len);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	return ret;
    }
    
    /**
    
     *	dev_open	- prepare an interface for use.
     *	@dev:	device to open
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
    
     *	Takes a device from down to up state. The device's private open
     *	function is invoked and then the multicast lists are loaded. Finally
     *	the device is moved into the up state and a %NETDEV_UP message is
     *	sent to the netdev notifier chain.
     *
     *	Calling this function on an active interface is a nop. On a failure
     *	a negative errno code is returned.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    int dev_open(struct net_device *dev)
    {
    	int ret;
    
    	if (dev->flags & IFF_UP)
    		return 0;
    
    	ret = __dev_open(dev);
    	if (ret < 0)
    		return ret;
    
    	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
    	call_netdevice_notifiers(NETDEV_UP, dev);
    
    	return ret;
    }
    EXPORT_SYMBOL(dev_open);
    
    
    static int __dev_close_many(struct list_head *head)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	list_for_each_entry(dev, head, unreg_list) {
    		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		clear_bit(__LINK_STATE_START, &dev->state);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* Synchronize to scheduled poll. We cannot touch poll list, it
    		 * can be even on different cpu. So just clear netif_running().
    		 *
    		 * dev->stop() will invoke napi_disable() on all of it's
    		 * napi_struct instances on this device.
    		 */
    		smp_mb__after_clear_bit(); /* Commit netif_running(). */
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	list_for_each_entry(dev, head, unreg_list) {
    		const struct net_device_ops *ops = dev->netdev_ops;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/*
    		 *	Call the device specific close. This cannot fail.
    		 *	Only if device is UP
    		 *
    		 *	We allow it to be called even after a DETACH hot-plug
    		 *	event.
    		 */
    		if (ops->ndo_stop)
    			ops->ndo_stop(dev);
    
    		dev->flags &= ~IFF_UP;
    		net_dmaengine_put();
    	}
    
    	return 0;
    }
    
    static int __dev_close(struct net_device *dev)
    {
    
    	/* Temporarily disable netpoll until the interface is down */
    	retval = netpoll_rx_disable(dev);
    	if (retval)
    		return retval;
    
    
    	list_add(&dev->unreg_list, &single);
    
    	retval = __dev_close_many(&single);
    	list_del(&single);
    
    static int dev_close_many(struct list_head *head)
    
    {
    	struct net_device *dev, *tmp;
    	LIST_HEAD(tmp_list);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	list_for_each_entry_safe(dev, tmp, head, unreg_list)
    		if (!(dev->flags & IFF_UP))
    			list_move(&dev->unreg_list, &tmp_list);
    
    	__dev_close_many(head);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    	list_for_each_entry(dev, head, unreg_list) {
    		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
    		call_netdevice_notifiers(NETDEV_DOWN, dev);
    	}
    
    	/* rollback_registered_many needs the complete original list */
    	list_splice(&tmp_list, head);
    
    	return 0;
    }
    
    /**
     *	dev_close - shutdown an interface.
     *	@dev: device to shutdown
     *
     *	This function moves an active device into down state. A
     *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
     *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
     *	chain.
     */
    int dev_close(struct net_device *dev)
    {
    
    	if (dev->flags & IFF_UP) {
    		LIST_HEAD(single);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    		/* Block netpoll rx while the interface is going down */
    		ret = netpoll_rx_disable(dev);
    		if (ret)
    			return ret;
    
    
    		list_add(&dev->unreg_list, &single);
    		dev_close_many(&single);
    		list_del(&single);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(dev_close);
    
    /**
     *	dev_disable_lro - disable Large Receive Offload on a device
     *	@dev: device
     *
     *	Disable Large Receive Offload (LRO) on a net device.  Must be
     *	called under RTNL.  This is needed if received packets may be
     *	forwarded to another interface.
     */
    void dev_disable_lro(struct net_device *dev)
    {
    
    	/*
    	 * If we're trying to disable lro on a vlan device
    	 * use the underlying physical device instead
    	 */
    	if (is_vlan_dev(dev))
    		dev = vlan_dev_real_dev(dev);
    
    
    	dev->wanted_features &= ~NETIF_F_LRO;
    	netdev_update_features(dev);
    
    	if (unlikely(dev->features & NETIF_F_LRO))
    		netdev_WARN(dev, "failed to disable LRO!\n");
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /**
     *	register_netdevice_notifier - register a network notifier block
     *	@nb: notifier
     *
     *	Register a notifier to be called when network device events occur.
     *	The notifier passed is linked into the kernel structures and must
     *	not be reused until it has been unregistered. A negative errno code
     *	is returned on a failure.
     *
     * 	When registered all registration and up events are replayed
    
     *	to the new notifier to allow device to have a race free
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *	view of the network device list.
     */
    
    int register_netdevice_notifier(struct notifier_block *nb)
    {
    	struct net_device *dev;
    
    	struct net_device *last;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	int err;
    
    	rtnl_lock();
    
    	err = raw_notifier_chain_register(&netdev_chain, nb);
    
    	if (dev_boot_phase)
    		goto unlock;
    	for_each_net(net) {
    		for_each_netdev(net, dev) {
    			err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
    			err = notifier_to_errno(err);
    			if (err)
    				goto rollback;
    
    			if (!(dev->flags & IFF_UP))
    				continue;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    			nb->notifier_call(nb, NETDEV_UP, dev);
    		}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	}
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	rtnl_unlock();
    	return err;
    
    	for_each_net(net) {
    		for_each_netdev(net, dev) {
    			if (dev == last)
    
    			if (dev->flags & IFF_UP) {
    				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
    				nb->notifier_call(nb, NETDEV_DOWN, dev);
    			}
    			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
    
    	raw_notifier_chain_unregister(&netdev_chain, nb);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(register_netdevice_notifier);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /**
     *	unregister_netdevice_notifier - unregister a network notifier block
     *	@nb: notifier
     *
     *	Unregister a notifier previously registered by
     *	register_netdevice_notifier(). The notifier is unlinked into the
     *	kernel structures and may then be reused. A negative errno code
     *	is returned on a failure.
    
     *
     * 	After unregistering unregister and down device events are synthesized
     *	for all devices on the device list to the removed notifier to remove
     *	the need for special case cleanup code.
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     */
    
    int unregister_netdevice_notifier(struct notifier_block *nb)
    {
    
    	struct net_device *dev;
    	struct net *net;
    
    	err = raw_notifier_chain_unregister(&netdev_chain, nb);
    
    	if (err)
    		goto unlock;
    
    	for_each_net(net) {
    		for_each_netdev(net, dev) {
    			if (dev->flags & IFF_UP) {
    				nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
    				nb->notifier_call(nb, NETDEV_DOWN, dev);
    			}
    			nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
    		}
    	}
    unlock:
    
    	rtnl_unlock();
    	return err;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(unregister_netdevice_notifier);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    /**
     *	call_netdevice_notifiers - call all network notifier blocks
     *      @val: value passed unmodified to notifier function
    
     *      @dev: net_device pointer passed unmodified to notifier function
    
    Linus Torvalds's avatar
    Linus Torvalds committed
     *
     *	Call all network notifier blocks.  Parameters and return value
    
     *	are as for raw_notifier_call_chain().
    
    int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	return raw_notifier_call_chain(&netdev_chain, val, dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(call_netdevice_notifiers);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static struct static_key netstamp_needed __read_mostly;
    
    #ifdef HAVE_JUMP_LABEL
    
    /* We are not allowed to call static_key_slow_dec() from irq context
    
     * If net_disable_timestamp() is called from irq context, defer the
    
     */
    static atomic_t netstamp_needed_deferred;
    #endif
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    void net_enable_timestamp(void)
    {
    
    #ifdef HAVE_JUMP_LABEL
    	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
    
    	if (deferred) {
    		while (--deferred)
    
    			static_key_slow_dec(&netstamp_needed);
    
    		return;
    	}
    #endif
    	WARN_ON(in_interrupt());
    
    	static_key_slow_inc(&netstamp_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(net_enable_timestamp);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    void net_disable_timestamp(void)
    {
    
    #ifdef HAVE_JUMP_LABEL
    	if (in_interrupt()) {
    		atomic_inc(&netstamp_needed_deferred);
    		return;
    	}
    #endif
    
    	static_key_slow_dec(&netstamp_needed);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    }
    
    EXPORT_SYMBOL(net_disable_timestamp);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    static inline void net_timestamp_set(struct sk_buff *skb)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    
    	skb->tstamp.tv64 = 0;
    
    	if (static_key_false(&netstamp_needed))
    
    #define net_timestamp_check(COND, SKB)			\
    
    	if (static_key_false(&netstamp_needed)) {		\
    
    		if ((COND) && !(SKB)->tstamp.tv64)	\
    			__net_timestamp(SKB);		\
    	}						\
    
    static inline bool is_skb_forwardable(struct net_device *dev,
    				      struct sk_buff *skb)
    {
    	unsigned int len;
    
    	if (!(dev->flags & IFF_UP))
    		return false;
    
    	len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
    	if (skb->len <= len)
    		return true;
    
    	/* if TSO is enabled, we don't care about the length as the packet
    	 * could be forwarded without being segmented before
    	 */
    	if (skb_is_gso(skb))
    		return true;
    
    	return false;
    }
    
    
    /**
     * dev_forward_skb - loopback an skb to another netif
     *
     * @dev: destination network device
     * @skb: buffer to forward
     *
     * return values:
     *	NET_RX_SUCCESS	(no congestion)
    
     *	NET_RX_DROP     (packet was dropped, but freed)
    
     *
     * dev_forward_skb can be used for injecting an skb from the
     * start_xmit function of one device into the receive queue
     * of another device.
     *
     * The receiving device may be in another namespace, so
     * we have to clear all information in the skb that could
     * impact namespace isolation.
     */
    int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
    {
    
    	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
    		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
    			atomic_long_inc(&dev->rx_dropped);
    			kfree_skb(skb);
    			return NET_RX_DROP;
    		}
    	}
    
    
    	nf_reset(skb);
    
    	if (unlikely(!is_skb_forwardable(dev, skb))) {
    
    		atomic_long_inc(&dev->rx_dropped);
    
    		return NET_RX_DROP;
    
    	skb->tstamp.tv64 = 0;
    	skb->pkt_type = PACKET_HOST;
    	skb->protocol = eth_type_trans(skb, dev);
    
    	skb->mark = 0;
    	secpath_reset(skb);
    	nf_reset(skb);
    
    	return netif_rx(skb);
    }
    EXPORT_SYMBOL_GPL(dev_forward_skb);
    
    
    static inline int deliver_skb(struct sk_buff *skb,
    			      struct packet_type *pt_prev,
    			      struct net_device *orig_dev)
    {
    
    	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
    		return -ENOMEM;
    
    	atomic_inc(&skb->users);
    	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
    }
    
    
    static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
    {
    
    	if (!ptype->af_packet_priv || !skb->sk)
    
    		return false;
    
    	if (ptype->id_match)
    		return ptype->id_match(ptype, skb->sk);
    	else if ((struct sock *)ptype->af_packet_priv == skb->sk)
    		return true;
    
    	return false;
    }
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    /*
     *	Support routine. Sends outgoing frames to any network
     *	taps currently in use.
     */
    
    
    static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    {
    	struct packet_type *ptype;
    
    	struct sk_buff *skb2 = NULL;
    	struct packet_type *pt_prev = NULL;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	rcu_read_lock();
    	list_for_each_entry_rcu(ptype, &ptype_all, list) {
    		/* Never send packets back to the socket
    		 * they originated from - MvS (miquels@drinkel.ow.org)
    		 */
    		if ((ptype->dev == dev || !ptype->dev) &&
    
    		    (!skb_loop_sk(ptype, skb))) {
    
    			if (pt_prev) {
    				deliver_skb(skb2, pt_prev, skb->dev);
    				pt_prev = ptype;
    				continue;
    			}
    
    			skb2 = skb_clone(skb, GFP_ATOMIC);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			if (!skb2)
    				break;
    
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			/* skb->nh should be correctly
    			   set by sender, so that the second statement is
    			   just protection against buggy protocols.
    			 */
    
    			skb_reset_mac_header(skb2);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    
    
    			if (skb_network_header(skb2) < skb2->data ||
    
    			    skb2->network_header > skb2->tail) {
    
    				net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
    						     ntohs(skb2->protocol),
    						     dev->name);
    
    				skb_reset_network_header(skb2);
    
    			skb2->transport_header = skb2->network_header;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    			skb2->pkt_type = PACKET_OUTGOING;
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    		}
    	}
    
    	if (pt_prev)
    		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
    
    Linus Torvalds's avatar
    Linus Torvalds committed
    	rcu_read_unlock();
    }
    
    
    /**
     * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
    
     * @dev: Network device
     * @txq: number of queues available
     *
     * If real_num_tx_queues is changed the tc mappings may no longer be
     * valid. To resolve this verify the tc mapping remains valid and if
     * not NULL the mapping. With no priorities mapping to this
     * offset/count pair it will no longer be used. In the worst case TC0
     * is invalid nothing can be done so disable priority mappings. If is
     * expected that drivers will fix this mapping if they can before
     * calling netif_set_real_num_tx_queues.
     */
    
    static void netif_setup_tc(struct net_device *dev, unsigned int txq)
    
    {
    	int i;
    	struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
    
    	/* If TC0 is invalidated disable TC mapping */
    	if (tc->offset + tc->count > txq) {
    
    		pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
    
    		dev->num_tc = 0;
    		return;
    	}
    
    	/* Invalidated prio to tc mappings set to TC0 */
    	for (i = 1; i < TC_BITMASK + 1; i++) {
    		int q = netdev_get_prio_tc_map(dev, i);
    
    		tc = &dev->tc_to_txq[q];
    		if (tc->offset + tc->count > txq) {
    
    			pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
    				i, q);
    
    			netdev_set_prio_tc_map(dev, i, 0);
    		}
    	}
    }
    
    
    #ifdef CONFIG_XPS
    static DEFINE_MUTEX(xps_map_mutex);
    #define xmap_dereference(P)		\
    	rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
    
    
    static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
    					int cpu, u16 index)
    
    	if (dev_maps)
    		map = xmap_dereference(dev_maps->cpu_map[cpu]);
    
    	for (pos = 0; map && pos < map->len; pos++) {
    		if (map->queues[pos] == index) {
    
    			if (map->len > 1) {
    				map->queues[pos] = map->queues[--map->len];
    			} else {
    
    				RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
    
    static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
    
    	bool active = false;
    
    	mutex_lock(&xps_map_mutex);
    	dev_maps = xmap_dereference(dev->xps_maps);
    
    	if (!dev_maps)
    		goto out_no_maps;
    
    	for_each_possible_cpu(cpu) {
    
    		for (i = index; i < dev->num_tx_queues; i++) {
    			if (!remove_xps_queue(dev_maps, cpu, i))
    				break;
    		}
    		if (i == dev->num_tx_queues)
    
    		RCU_INIT_POINTER(dev->xps_maps, NULL);
    		kfree_rcu(dev_maps, rcu);
    	}
    
    
    	for (i = index; i < dev->num_tx_queues; i++)
    		netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
    					     NUMA_NO_NODE);
    
    
    static struct xps_map *expand_xps_map(struct xps_map *map,
    				      int cpu, u16 index)
    {
    	struct xps_map *new_map;
    	int alloc_len = XPS_MIN_MAP_ALLOC;
    	int i, pos;
    
    	for (pos = 0; map && pos < map->len; pos++) {
    		if (map->queues[pos] != index)
    			continue;
    		return map;
    	}
    
    	/* Need to add queue to this CPU's existing map */
    	if (map) {
    		if (pos < map->alloc_len)
    			return map;
    
    		alloc_len = map->alloc_len * 2;
    	}
    
    	/* Need to allocate new map to store queue on this CPU's map */
    	new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
    			       cpu_to_node(cpu));
    	if (!new_map)
    		return NULL;
    
    	for (i = 0; i < pos; i++)
    		new_map->queues[i] = map->queues[i];
    	new_map->alloc_len = alloc_len;
    	new_map->len = pos;
    
    	return new_map;
    }
    
    
    int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
    {
    
    	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
    
    	struct xps_map *map, *new_map;
    	int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
    
    	int cpu, numa_node_id = -2;
    	bool active = false;
    
    
    	mutex_lock(&xps_map_mutex);
    
    	dev_maps = xmap_dereference(dev->xps_maps);
    
    
    	/* allocate memory for queue storage */
    	for_each_online_cpu(cpu) {
    		if (!cpumask_test_cpu(cpu, mask))
    			continue;
    
    		if (!new_dev_maps)
    			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
    
    		if (!new_dev_maps) {
    			mutex_unlock(&xps_map_mutex);
    
    
    		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
    				 NULL;
    
    		map = expand_xps_map(map, cpu, index);
    		if (!map)
    			goto error;
    
    		RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
    	}
    
    	if (!new_dev_maps)
    		goto out_no_new_maps;
    
    
    		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
    			/* add queue to CPU maps */
    			int pos = 0;
    
    			map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
    			while ((pos < map->len) && (map->queues[pos] != index))
    				pos++;
    
    			if (pos == map->len)
    				map->queues[map->len++] = index;
    
    #ifdef CONFIG_NUMA
    			if (numa_node_id == -2)
    				numa_node_id = cpu_to_node(cpu);
    			else if (numa_node_id != cpu_to_node(cpu))
    				numa_node_id = -1;
    #endif
    
    		} else if (dev_maps) {
    			/* fill in the new device map from the old device map */
    			map = xmap_dereference(dev_maps->cpu_map[cpu]);
    			RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
    
    	rcu_assign_pointer(dev->xps_maps, new_dev_maps);
    
    
    	if (dev_maps) {
    		for_each_possible_cpu(cpu) {
    			new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
    			map = xmap_dereference(dev_maps->cpu_map[cpu]);
    			if (map && map != new_map)
    				kfree_rcu(map, rcu);
    		}
    
    	dev_maps = new_dev_maps;
    	active = true;
    
    out_no_new_maps:
    	/* update Tx queue numa node */
    
    	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
    				     (numa_node_id >= 0) ? numa_node_id :
    				     NUMA_NO_NODE);
    
    
    	if (!dev_maps)
    		goto out_no_maps;
    
    	/* removes queue from unused CPUs */
    	for_each_possible_cpu(cpu) {
    		if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
    			continue;
    
    		if (remove_xps_queue(dev_maps, cpu, index))
    			active = true;
    	}
    
    	/* free map if not active */
    	if (!active) {
    		RCU_INIT_POINTER(dev->xps_maps, NULL);
    		kfree_rcu(dev_maps, rcu);
    	}
    
    out_no_maps:
    
    	mutex_unlock(&xps_map_mutex);
    
    	return 0;
    error:
    
    	/* remove any maps that we added */
    	for_each_possible_cpu(cpu) {
    		new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
    		map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
    				 NULL;
    		if (new_map && new_map != map)
    			kfree(new_map);
    	}
    
    
    	mutex_unlock(&xps_map_mutex);
    
    	kfree(new_dev_maps);
    	return -ENOMEM;
    }
    EXPORT_SYMBOL(netif_set_xps_queue);
    
    #endif
    
    /*
     * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
     * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
     */
    
    int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
    
    	if (txq < 1 || txq > dev->num_tx_queues)
    		return -EINVAL;