Newer
Older
/*
* IP multicast routing support for mrouted 3.6/3.8
*
* (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
* Linux Consultancy and Custom Driver Development
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Fixes:
* Michael Chastain : Incorrect size of copying.
* Alan Cox : Added the cache manager code
* Alan Cox : Fixed the clone/copy bug and device race.
* Mike McLagan : Routing by source
* Malcolm Beattie : Buffer handling fixes.
* Alexey Kuznetsov : Double buffer free and other fixes.
* SVR Anand : Fixed several multicast bugs and problems.
* Alexey Kuznetsov : Status, optimisations and more.
* Brad Parker : Better behaviour on mrouted upcall
* overflow.
* Carlos Picoto : PIMv1 Support
* Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
* Relax this requrement to work with older peers.
*
*/
#include <asm/system.h>
#include <asm/uaccess.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/igmp.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/mroute.h>
#include <linux/init.h>
#include <linux/if_ether.h>
#include <net/net_namespace.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <linux/skbuff.h>

Arnaldo Carvalho de Melo
committed
#include <net/route.h>
#include <net/sock.h>
#include <net/icmp.h>
#include <net/udp.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
#include <linux/netfilter_ipv4.h>
#include <net/ipip.h>
#include <net/checksum.h>
#include <net/netlink.h>
#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
#define CONFIG_IP_PIMSM 1
#endif
/* Big lock, protecting vif table, mrt cache and mroute socket state.
Note that the changes are semaphored via rtnl_lock.
*/
static DEFINE_RWLOCK(mrt_lock);
/*
* Multicast router control variables
*/
#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
/* We return to original Alan's scheme. Hash table of resolved
entries is changed only in process context and protected
with weak lock mrt_lock. Queue of unresolved entries is protected
with strong spinlock mfc_unres_lock.
In this case data path is free of exclusive locks at all.
*/
static struct kmem_cache *mrt_cachep __read_mostly;
static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
#ifdef CONFIG_IP_PIMSM_V2
static struct net_protocol pim_protocol;
#endif
static struct timer_list ipmr_expire_timer;
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
{
dev_close(dev);
dev = __dev_get_by_name(&init_net, "tunl0");
if (dev) {
const struct net_device_ops *ops = dev->netdev_ops;
struct ifreq ifr;
struct ip_tunnel_parm p;
memset(&p, 0, sizeof(p));
p.iph.daddr = v->vifc_rmt_addr.s_addr;
p.iph.saddr = v->vifc_lcl_addr.s_addr;
p.iph.version = 4;
p.iph.ihl = 5;
p.iph.protocol = IPPROTO_IPIP;
sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
if (ops->ndo_do_ioctl) {
mm_segment_t oldfs = get_fs();
set_fs(KERNEL_DS);
ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
set_fs(oldfs);
}
static
struct net_device *ipmr_new_tunnel(struct vifctl *v)
{
struct net_device *dev;
dev = __dev_get_by_name(&init_net, "tunl0");
const struct net_device_ops *ops = dev->netdev_ops;
int err;
struct ifreq ifr;
struct ip_tunnel_parm p;
struct in_device *in_dev;
memset(&p, 0, sizeof(p));
p.iph.daddr = v->vifc_rmt_addr.s_addr;
p.iph.saddr = v->vifc_lcl_addr.s_addr;
p.iph.version = 4;
p.iph.ihl = 5;
p.iph.protocol = IPPROTO_IPIP;
sprintf(p.name, "dvmrp%d", v->vifc_vifi);
ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
if (ops->ndo_do_ioctl) {
mm_segment_t oldfs = get_fs();
set_fs(KERNEL_DS);
err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
set_fs(oldfs);
} else
err = -EOPNOTSUPP;
if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
in_dev = __in_dev_get_rtnl(dev);
ipv4_devconf_setall(in_dev);
IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
}
}
return dev;
failure:
/* allow the register to be completed before unregistering. */
rtnl_unlock();
rtnl_lock();
unregister_netdevice(dev);
return NULL;
}
#ifdef CONFIG_IP_PIMSM
static int reg_vif_num = -1;
static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
{
read_lock(&mrt_lock);
dev->stats.tx_bytes += skb->len;
dev->stats.tx_packets++;
ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
read_unlock(&mrt_lock);
kfree_skb(skb);
return 0;
}
static const struct net_device_ops reg_vif_netdev_ops = {
.ndo_start_xmit = reg_vif_xmit,
};
static void reg_vif_setup(struct net_device *dev)
{
dev->type = ARPHRD_PIMREG;
dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
dev->netdev_ops = ®_vif_netdev_ops,
dev->destructor = free_netdev;
}
static struct net_device *ipmr_reg_vif(void)
{
struct net_device *dev;
struct in_device *in_dev;
dev = alloc_netdev(0, "pimreg", reg_vif_setup);
if (dev == NULL)
return NULL;
if (register_netdevice(dev)) {
free_netdev(dev);
return NULL;
}
dev->iflink = 0;
rcu_read_lock();
if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
rcu_read_unlock();
ipv4_devconf_setall(in_dev);
IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
rcu_read_unlock();
return dev;
failure:
/* allow the register to be completed before unregistering. */
rtnl_unlock();
rtnl_lock();
unregister_netdevice(dev);
return NULL;
}
#endif
/*
* Delete a VIF entry
* @notify: Set to 1, if the caller is a notifier_call
static int vif_delete(int vifi, int notify)
{
struct vif_device *v;
struct net_device *dev;
struct in_device *in_dev;
if (vifi < 0 || vifi >= init_net.ipv4.maxvif)
v = &init_net.ipv4.vif_table[vifi];
write_lock_bh(&mrt_lock);
dev = v->dev;
v->dev = NULL;
if (!dev) {
write_unlock_bh(&mrt_lock);
return -EADDRNOTAVAIL;
}
#ifdef CONFIG_IP_PIMSM
if (vifi == reg_vif_num)
reg_vif_num = -1;
#endif
if (vifi+1 == init_net.ipv4.maxvif) {
if (VIF_EXISTS(&init_net, tmp))
init_net.ipv4.maxvif = tmp+1;
}
write_unlock_bh(&mrt_lock);
dev_set_allmulti(dev, -1);
if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
unregister_netdevice(dev);
dev_put(dev);
return 0;
}
static inline void ipmr_cache_free(struct mfc_cache *c)
{
release_net(mfc_net(c));
kmem_cache_free(mrt_cachep, c);
}
/* Destroy an unresolved cache entry, killing queued skbs
and reporting error to netlink readers.
*/
static void ipmr_destroy_unres(struct mfc_cache *c)
{
struct sk_buff *skb;
atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
nlh->nlmsg_type = NLMSG_ERROR;
nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
skb_trim(skb, nlh->nlmsg_len);
e = NLMSG_DATA(nlh);
e->error = -ETIMEDOUT;
memset(&e->msg, 0, sizeof(e->msg));
rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
}
/* Single timer process for all the unresolved queue. */
static void ipmr_expire_process(unsigned long dummy)
{
unsigned long now;
unsigned long expires;
struct mfc_cache *c, **cp;
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
return;
}
if (mfc_unres_queue == NULL)
goto out;
now = jiffies;
expires = 10*HZ;
cp = &mfc_unres_queue;
while ((c=*cp) != NULL) {
if (time_after(c->mfc_un.unres.expires, now)) {
unsigned long interval = c->mfc_un.unres.expires - now;
if (interval < expires)
expires = interval;
cp = &c->next;
continue;
}
*cp = c->next;
ipmr_destroy_unres(c);
}
if (mfc_unres_queue != NULL)
mod_timer(&ipmr_expire_timer, jiffies + expires);
out:
spin_unlock(&mfc_unres_lock);
}
/* Fill oifs list. It is called under write locked mrt_lock. */
static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
{
int vifi;
cache->mfc_un.res.minvif = MAXVIFS;
cache->mfc_un.res.maxvif = 0;
memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) {
if (VIF_EXISTS(&init_net, vifi) &&
ttls[vifi] && ttls[vifi] < 255) {
cache->mfc_un.res.ttls[vifi] = ttls[vifi];
if (cache->mfc_un.res.minvif > vifi)
cache->mfc_un.res.minvif = vifi;
if (cache->mfc_un.res.maxvif <= vifi)
cache->mfc_un.res.maxvif = vifi + 1;
}
}
}
static int vif_add(struct vifctl *vifc, int mrtsock)
{
int vifi = vifc->vifc_vifi;
struct vif_device *v = &init_net.ipv4.vif_table[vifi];
if (VIF_EXISTS(&init_net, vifi))
return -EADDRINUSE;
switch (vifc->vifc_flags) {
#ifdef CONFIG_IP_PIMSM
case VIFF_REGISTER:
/*
* Special Purpose VIF in PIM
* All the packets will be sent to the daemon
*/
if (reg_vif_num >= 0)
return -EADDRINUSE;
dev = ipmr_reg_vif();
if (!dev)
return -ENOBUFS;
err = dev_set_allmulti(dev, 1);
if (err) {
unregister_netdevice(dev);
dev = ipmr_new_tunnel(vifc);
if (!dev)
return -ENOBUFS;
err = dev_set_allmulti(dev, 1);
if (err) {
ipmr_del_tunnel(dev, vifc);
dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
ip_rt_multicast_event(in_dev);
/*
* Fill in the VIF structures
*/
v->rate_limit = vifc->vifc_rate_limit;
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
v->flags = vifc->vifc_flags;
v->bytes_in = 0;
v->bytes_out = 0;
v->pkt_in = 0;
v->pkt_out = 0;
v->link = dev->ifindex;
if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
v->link = dev->iflink;
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
#ifdef CONFIG_IP_PIMSM
if (v->flags&VIFF_REGISTER)
reg_vif_num = vifi;
#endif
if (vifi+1 > init_net.ipv4.maxvif)
init_net.ipv4.maxvif = vifi+1;
static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) {
if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
break;
}
return c;
}
/*
* Allocate a multicast cache entry
*/
static struct mfc_cache *ipmr_cache_alloc(struct net *net)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (c == NULL)
static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (c == NULL)
return NULL;
skb_queue_head_init(&c->mfc_un.unres.unresolved);
c->mfc_un.unres.expires = jiffies + 10*HZ;
return c;
}
/*
* A cache entry has gone into a resolved state from queued
*/
static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
{
struct sk_buff *skb;
/*
* Play the pending entries through our router
*/
while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
if (ip_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
nlh->nlmsg_len = (skb_tail_pointer(skb) -
(u8 *)nlh);
} else {
nlh->nlmsg_type = NLMSG_ERROR;
nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
skb_trim(skb, nlh->nlmsg_len);
e = NLMSG_DATA(nlh);
e->error = -EMSGSIZE;
memset(&e->msg, 0, sizeof(e->msg));
rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
} else
ip_mr_forward(skb, c, 0);
}
}
/*
* Bounce a cache query up to mrouted. We could use netlink for this but mrouted
* expects the following bizarre scheme.
*
* Called under mrt_lock.
*/
static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
{
struct sk_buff *skb;
struct igmphdr *igmp;
struct igmpmsg *msg;
int ret;
#ifdef CONFIG_IP_PIMSM
if (assert == IGMPMSG_WHOLEPKT)
skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
else
#endif
skb = alloc_skb(128, GFP_ATOMIC);
return -ENOBUFS;
#ifdef CONFIG_IP_PIMSM
if (assert == IGMPMSG_WHOLEPKT) {
/* Ugly, but we have no choice with this interface.
Duplicate old header, fix ihl, length etc.
And all this only to mangle msg->im_msgtype and
to set msg->im_mbz to "mbz" :-)
*/

Arnaldo Carvalho de Melo
committed
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
msg = (struct igmpmsg *)skb_network_header(skb);
memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
msg->im_msgtype = IGMPMSG_WHOLEPKT;
msg->im_mbz = 0;
ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
sizeof(struct iphdr));
skb->network_header = skb->tail;
skb_put(skb, ihl);
skb_copy_to_linear_data(skb, pkt->data, ihl);
ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
skb->dst = dst_clone(pkt->dst);
/*
* Add our header
*/
igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
igmp->type =
msg->im_msgtype = assert;
igmp->code = 0;
ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */

Arnaldo Carvalho de Melo
committed
skb->transport_header = skb->network_header;
if (init_net.ipv4.mroute_sk == NULL) {
kfree_skb(skb);
return -EINVAL;
}
/*
* Deliver to mrouted
*/
ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb);
if (ret < 0) {
if (net_ratelimit())
printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
kfree_skb(skb);
}
return ret;
}
/*
* Queue a packet for resolution. It gets locked cache entry!
*/
static int
ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
{
int err;
struct mfc_cache *c;
const struct iphdr *iph = ip_hdr(skb);
spin_lock_bh(&mfc_unres_lock);
for (c=mfc_unres_queue; c; c=c->next) {
if (net_eq(mfc_net(c), &init_net) &&
c->mfc_mcastgrp == iph->daddr &&
c->mfc_origin == iph->saddr)
break;
}
if (c == NULL) {
/*
* Create a new entry if allowable
*/
if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 ||
(c = ipmr_cache_alloc_unres(&init_net)) == NULL) {
spin_unlock_bh(&mfc_unres_lock);
kfree_skb(skb);
return -ENOBUFS;
}
/*
* Fill in the new cache entry
*/
c->mfc_parent = -1;
c->mfc_origin = iph->saddr;
c->mfc_mcastgrp = iph->daddr;
/*
* Reflect first query at mrouted.
*/
if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
/* If the report failed throw the cache entry
out - Brad Parker
*/
spin_unlock_bh(&mfc_unres_lock);
atomic_inc(&init_net.ipv4.cache_resolve_queue_len);
c->next = mfc_unres_queue;
mfc_unres_queue = c;
mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
}
/*
* See if we can append the packet
*/
if (c->mfc_un.unres.unresolved.qlen>3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
err = 0;
}
spin_unlock_bh(&mfc_unres_lock);
return err;
}
/*
* MFC cache manipulation by user space mroute daemon
*/
static int ipmr_mfc_delete(struct mfcctl *mfc)
{
int line;
struct mfc_cache *c, **cp;
line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
for (cp = &init_net.ipv4.mfc_cache_array[line];
(c = *cp) != NULL; cp = &c->next) {
if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
write_lock_bh(&mrt_lock);
*cp = c->next;
write_unlock_bh(&mrt_lock);
return 0;
}
}
return -ENOENT;
}
static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
{
int line;
struct mfc_cache *uc, *c, **cp;
line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
for (cp = &init_net.ipv4.mfc_cache_array[line];
(c = *cp) != NULL; cp = &c->next) {
if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
break;
}
if (c != NULL) {
write_lock_bh(&mrt_lock);
c->mfc_parent = mfc->mfcc_parent;
ipmr_update_thresholds(c, mfc->mfcc_ttls);
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
return 0;
}
if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
c = ipmr_cache_alloc(&init_net);
c->mfc_origin = mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
c->mfc_parent = mfc->mfcc_parent;
ipmr_update_thresholds(c, mfc->mfcc_ttls);
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
write_lock_bh(&mrt_lock);
c->next = init_net.ipv4.mfc_cache_array[line];
init_net.ipv4.mfc_cache_array[line] = c;
write_unlock_bh(&mrt_lock);
/*
* Check to see if we resolved a queued list. If so we
* need to send on the frames and tidy up.
*/
spin_lock_bh(&mfc_unres_lock);
for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
cp = &uc->next) {
if (net_eq(mfc_net(uc), &init_net) &&
uc->mfc_origin == c->mfc_origin &&
uc->mfc_mcastgrp == c->mfc_mcastgrp) {
*cp = uc->next;
atomic_dec(&init_net.ipv4.cache_resolve_queue_len);
if (mfc_unres_queue == NULL)
del_timer(&ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
if (uc) {
ipmr_cache_resolve(uc, c);
}
return 0;
}
/*
* Close the multicast socket, and clear the vif tables etc
*/
static void mroute_clean_tables(struct sock *sk)
{
int i;
for (i = 0; i < init_net.ipv4.maxvif; i++) {
if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC))
cp = &init_net.ipv4.mfc_cache_array[i];
while ((c = *cp) != NULL) {
if (c->mfc_flags&MFC_STATIC) {
cp = &c->next;
continue;
}
write_lock_bh(&mrt_lock);
*cp = c->next;
write_unlock_bh(&mrt_lock);
if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) {
struct mfc_cache *c, **cp;
cp = &mfc_unres_queue;
while ((c = *cp) != NULL) {
if (!net_eq(mfc_net(c), &init_net)) {
cp = &c->next;
continue;
}
*cp = c->next;
ipmr_destroy_unres(c);
}
spin_unlock_bh(&mfc_unres_lock);
}
}
static void mrtsock_destruct(struct sock *sk)
{
rtnl_lock();
if (sk == init_net.ipv4.mroute_sk) {
IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--;
init_net.ipv4.mroute_sk = NULL;
write_unlock_bh(&mrt_lock);
mroute_clean_tables(sk);
}
rtnl_unlock();
}
/*
* Socket options and virtual interface manipulation. The whole
* virtual interface system is a complete heap, but unfortunately
* that's how BSD mrouted happens to think. Maybe one day with a proper
* MOSPF/PIM router set up we can clean this up.
*/
int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
{
int ret;
struct vifctl vif;
struct mfcctl mfc;
if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
switch (optname) {
case MRT_INIT:
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->num != IPPROTO_IGMP)
return -EOPNOTSUPP;
if (init_net.ipv4.mroute_sk) {
return -EADDRINUSE;
}
ret = ip_ra_control(sk, 1, mrtsock_destruct);
if (ret == 0) {
write_lock_bh(&mrt_lock);
init_net.ipv4.mroute_sk = sk;
IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++;
}
rtnl_unlock();
return ret;
case MRT_DONE:
if (sk != init_net.ipv4.mroute_sk)
return -EACCES;
return ip_ra_control(sk, 0, NULL);
case MRT_ADD_VIF:
case MRT_DEL_VIF:
if (copy_from_user(&vif, optval, sizeof(vif)))
return -EFAULT;
if (vif.vifc_vifi >= MAXVIFS)
return -ENFILE;
rtnl_lock();
ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk);
/*
* Manipulate the forwarding caches. These live
* in a sort of kernel/user symbiosis.
*/
if (copy_from_user(&mfc, optval, sizeof(mfc)))
ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk);