Merge branch 'ipmr-remove-rwlocks'
Eric Dumazet says: ==================== ipmr: get rid of rwlocks We need to get rid of rwlocks in networking stacks, if read_lock() is (ab)used from softirq context. As discussed recently [1], rwlock are unfair by design in this case, and writers can starve and trigger soft lockups. This series convert ipmr code (both IPv4 and IPv6 families) to RCU and spinlocks. [1] https://lkml.org/lkml/2022/6/17/272 v2: fixed two typos, and resent because patch 19/19 did not make it to patchwork. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
23f94f1bc1
@ -26,7 +26,7 @@
|
||||
* @remote: Remote address for tunnels
|
||||
*/
|
||||
struct vif_device {
|
||||
struct net_device *dev;
|
||||
struct net_device __rcu *dev;
|
||||
netdevice_tracker dev_tracker;
|
||||
unsigned long bytes_in, bytes_out;
|
||||
unsigned long pkt_in, pkt_out;
|
||||
@ -52,6 +52,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb,
|
||||
unsigned short family,
|
||||
enum fib_event_type event_type,
|
||||
struct vif_device *vif,
|
||||
struct net_device *vif_dev,
|
||||
unsigned short vif_index, u32 tb_id,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
@ -60,7 +61,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb,
|
||||
.family = family,
|
||||
.extack = extack,
|
||||
},
|
||||
.dev = vif->dev,
|
||||
.dev = vif_dev,
|
||||
.vif_index = vif_index,
|
||||
.vif_flags = vif->flags,
|
||||
.tb_id = tb_id,
|
||||
@ -73,6 +74,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
|
||||
unsigned short family,
|
||||
enum fib_event_type event_type,
|
||||
struct vif_device *vif,
|
||||
struct net_device *vif_dev,
|
||||
unsigned short vif_index, u32 tb_id,
|
||||
unsigned int *ipmr_seq)
|
||||
{
|
||||
@ -80,7 +82,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
|
||||
.info = {
|
||||
.family = family,
|
||||
},
|
||||
.dev = vif->dev,
|
||||
.dev = vif_dev,
|
||||
.vif_index = vif_index,
|
||||
.vif_flags = vif->flags,
|
||||
.tb_id = tb_id,
|
||||
@ -98,7 +100,8 @@ static inline int mr_call_vif_notifiers(struct net *net,
|
||||
#define MAXVIFS 32
|
||||
#endif
|
||||
|
||||
#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
|
||||
/* Note: This helper is deprecated. */
|
||||
#define VIF_EXISTS(_mrt, _idx) (!!rcu_access_pointer((_mrt)->vif_table[_idx].dev))
|
||||
|
||||
/* mfc_flags:
|
||||
* MFC_STATIC - the entry was added statically (not by a routing daemon)
|
||||
@ -305,7 +308,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
|
||||
struct netlink_ext_ack *extack),
|
||||
struct mr_table *(*mr_iter)(struct net *net,
|
||||
struct mr_table *mrt),
|
||||
rwlock_t *mrt_lock, struct netlink_ext_ack *extack);
|
||||
struct netlink_ext_ack *extack);
|
||||
#else
|
||||
static inline void vif_device_init(struct vif_device *v,
|
||||
struct net_device *dev,
|
||||
@ -360,7 +363,7 @@ static inline int mr_dump(struct net *net, struct notifier_block *nb,
|
||||
struct netlink_ext_ack *extack),
|
||||
struct mr_table *(*mr_iter)(struct net *net,
|
||||
struct mr_table *mrt),
|
||||
rwlock_t *mrt_lock, struct netlink_ext_ack *extack)
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
return -EINVAL;
|
||||
}
|
||||
|
217
net/ipv4/ipmr.c
217
net/ipv4/ipmr.c
@ -77,7 +77,12 @@ struct ipmr_result {
|
||||
* Note that the changes are semaphored via rtnl_lock.
|
||||
*/
|
||||
|
||||
static DEFINE_RWLOCK(mrt_lock);
|
||||
static DEFINE_SPINLOCK(mrt_lock);
|
||||
|
||||
static struct net_device *vif_dev_read(const struct vif_device *vif)
|
||||
{
|
||||
return rcu_dereference(vif->dev);
|
||||
}
|
||||
|
||||
/* Multicast router control variables */
|
||||
|
||||
@ -100,11 +105,11 @@ static void ipmr_free_table(struct mr_table *mrt);
|
||||
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
|
||||
struct net_device *dev, struct sk_buff *skb,
|
||||
struct mfc_cache *cache, int local);
|
||||
static int ipmr_cache_report(struct mr_table *mrt,
|
||||
static int ipmr_cache_report(const struct mr_table *mrt,
|
||||
struct sk_buff *pkt, vifi_t vifi, int assert);
|
||||
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
|
||||
int cmd);
|
||||
static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
|
||||
static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
|
||||
static void mroute_clean_tables(struct mr_table *mrt, int flags);
|
||||
static void ipmr_expire_process(struct timer_list *t);
|
||||
|
||||
@ -501,11 +506,15 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
return err;
|
||||
}
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
dev->stats.tx_bytes += skb->len;
|
||||
dev->stats.tx_packets++;
|
||||
ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_lock();
|
||||
|
||||
/* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
|
||||
ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
|
||||
IGMPMSG_WHOLEPKT);
|
||||
|
||||
rcu_read_unlock();
|
||||
kfree_skb(skb);
|
||||
return NETDEV_TX_OK;
|
||||
}
|
||||
@ -572,6 +581,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
|
||||
{
|
||||
struct net_device *reg_dev = NULL;
|
||||
struct iphdr *encap;
|
||||
int vif_num;
|
||||
|
||||
encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
|
||||
/* Check that:
|
||||
@ -584,11 +594,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
|
||||
ntohs(encap->tot_len) + pimlen > skb->len)
|
||||
return 1;
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
if (mrt->mroute_reg_vif_num >= 0)
|
||||
reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
|
||||
read_unlock(&mrt_lock);
|
||||
|
||||
/* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */
|
||||
vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
|
||||
if (vif_num >= 0)
|
||||
reg_dev = vif_dev_read(&mrt->vif_table[vif_num]);
|
||||
if (!reg_dev)
|
||||
return 1;
|
||||
|
||||
@ -614,10 +623,11 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
|
||||
static int call_ipmr_vif_entry_notifiers(struct net *net,
|
||||
enum fib_event_type event_type,
|
||||
struct vif_device *vif,
|
||||
struct net_device *vif_dev,
|
||||
vifi_t vif_index, u32 tb_id)
|
||||
{
|
||||
return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type,
|
||||
vif, vif_index, tb_id,
|
||||
vif, vif_dev, vif_index, tb_id,
|
||||
&net->ipv4.ipmr_seq);
|
||||
}
|
||||
|
||||
@ -649,22 +659,19 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
|
||||
|
||||
v = &mrt->vif_table[vifi];
|
||||
|
||||
if (VIF_EXISTS(mrt, vifi))
|
||||
call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
|
||||
mrt->id);
|
||||
|
||||
write_lock_bh(&mrt_lock);
|
||||
dev = v->dev;
|
||||
v->dev = NULL;
|
||||
|
||||
if (!dev) {
|
||||
write_unlock_bh(&mrt_lock);
|
||||
dev = rtnl_dereference(v->dev);
|
||||
if (!dev)
|
||||
return -EADDRNOTAVAIL;
|
||||
|
||||
spin_lock(&mrt_lock);
|
||||
call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev,
|
||||
vifi, mrt->id);
|
||||
RCU_INIT_POINTER(v->dev, NULL);
|
||||
|
||||
if (vifi == mrt->mroute_reg_vif_num) {
|
||||
/* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
|
||||
WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
|
||||
}
|
||||
|
||||
if (vifi == mrt->mroute_reg_vif_num)
|
||||
mrt->mroute_reg_vif_num = -1;
|
||||
|
||||
if (vifi + 1 == mrt->maxvif) {
|
||||
int tmp;
|
||||
|
||||
@ -672,10 +679,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
|
||||
if (VIF_EXISTS(mrt, tmp))
|
||||
break;
|
||||
}
|
||||
mrt->maxvif = tmp+1;
|
||||
WRITE_ONCE(mrt->maxvif, tmp + 1);
|
||||
}
|
||||
|
||||
write_unlock_bh(&mrt_lock);
|
||||
spin_unlock(&mrt_lock);
|
||||
|
||||
dev_set_allmulti(dev, -1);
|
||||
|
||||
@ -777,7 +784,7 @@ out:
|
||||
spin_unlock(&mfc_unres_lock);
|
||||
}
|
||||
|
||||
/* Fill oifs list. It is called under write locked mrt_lock. */
|
||||
/* Fill oifs list. It is called under locked mrt_lock. */
|
||||
static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache,
|
||||
unsigned char *ttls)
|
||||
{
|
||||
@ -889,15 +896,18 @@ static int vif_add(struct net *net, struct mr_table *mrt,
|
||||
v->remote = vifc->vifc_rmt_addr.s_addr;
|
||||
|
||||
/* And finish update writing critical data */
|
||||
write_lock_bh(&mrt_lock);
|
||||
v->dev = dev;
|
||||
spin_lock(&mrt_lock);
|
||||
rcu_assign_pointer(v->dev, dev);
|
||||
netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
|
||||
if (v->flags & VIFF_REGISTER)
|
||||
mrt->mroute_reg_vif_num = vifi;
|
||||
if (v->flags & VIFF_REGISTER) {
|
||||
/* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
|
||||
WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
|
||||
}
|
||||
if (vifi+1 > mrt->maxvif)
|
||||
mrt->maxvif = vifi+1;
|
||||
write_unlock_bh(&mrt_lock);
|
||||
call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
|
||||
WRITE_ONCE(mrt->maxvif, vifi + 1);
|
||||
spin_unlock(&mrt_lock);
|
||||
call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev,
|
||||
vifi, mrt->id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1001,9 +1011,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
|
||||
|
||||
/* Bounce a cache query up to mrouted and netlink.
|
||||
*
|
||||
* Called under mrt_lock.
|
||||
* Called under rcu_read_lock().
|
||||
*/
|
||||
static int ipmr_cache_report(struct mr_table *mrt,
|
||||
static int ipmr_cache_report(const struct mr_table *mrt,
|
||||
struct sk_buff *pkt, vifi_t vifi, int assert)
|
||||
{
|
||||
const int ihl = ip_hdrlen(pkt);
|
||||
@ -1038,8 +1048,11 @@ static int ipmr_cache_report(struct mr_table *mrt,
|
||||
msg->im_vif = vifi;
|
||||
msg->im_vif_hi = vifi >> 8;
|
||||
} else {
|
||||
msg->im_vif = mrt->mroute_reg_vif_num;
|
||||
msg->im_vif_hi = mrt->mroute_reg_vif_num >> 8;
|
||||
/* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */
|
||||
int vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
|
||||
|
||||
msg->im_vif = vif_num;
|
||||
msg->im_vif_hi = vif_num >> 8;
|
||||
}
|
||||
ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
|
||||
ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
|
||||
@ -1064,10 +1077,8 @@ static int ipmr_cache_report(struct mr_table *mrt,
|
||||
skb->transport_header = skb->network_header;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
mroute_sk = rcu_dereference(mrt->mroute_sk);
|
||||
if (!mroute_sk) {
|
||||
rcu_read_unlock();
|
||||
kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1076,7 +1087,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
|
||||
|
||||
/* Deliver to mrouted */
|
||||
ret = sock_queue_rcv_skb(mroute_sk, skb);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (ret < 0) {
|
||||
net_warn_ratelimited("mroute: pending queue full, dropping entries\n");
|
||||
kfree_skb(skb);
|
||||
@ -1086,6 +1097,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
|
||||
}
|
||||
|
||||
/* Queue a packet for resolution. It gets locked cache entry! */
|
||||
/* Called under rcu_read_lock() */
|
||||
static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
|
||||
struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
@ -1198,12 +1210,12 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
|
||||
mfc->mfcc_mcastgrp.s_addr, parent);
|
||||
rcu_read_unlock();
|
||||
if (c) {
|
||||
write_lock_bh(&mrt_lock);
|
||||
spin_lock(&mrt_lock);
|
||||
c->_c.mfc_parent = mfc->mfcc_parent;
|
||||
ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls);
|
||||
if (!mrtsock)
|
||||
c->_c.mfc_flags |= MFC_STATIC;
|
||||
write_unlock_bh(&mrt_lock);
|
||||
spin_unlock(&mrt_lock);
|
||||
call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
|
||||
mrt->id);
|
||||
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
|
||||
@ -1598,20 +1610,20 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
|
||||
if (vr.vifi >= mrt->maxvif)
|
||||
return -EINVAL;
|
||||
vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
|
||||
read_lock(&mrt_lock);
|
||||
rcu_read_lock();
|
||||
vif = &mrt->vif_table[vr.vifi];
|
||||
if (VIF_EXISTS(mrt, vr.vifi)) {
|
||||
vr.icount = vif->pkt_in;
|
||||
vr.ocount = vif->pkt_out;
|
||||
vr.ibytes = vif->bytes_in;
|
||||
vr.obytes = vif->bytes_out;
|
||||
read_unlock(&mrt_lock);
|
||||
vr.icount = READ_ONCE(vif->pkt_in);
|
||||
vr.ocount = READ_ONCE(vif->pkt_out);
|
||||
vr.ibytes = READ_ONCE(vif->bytes_in);
|
||||
vr.obytes = READ_ONCE(vif->bytes_out);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (copy_to_user(arg, &vr, sizeof(vr)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return -EADDRNOTAVAIL;
|
||||
case SIOCGETSGCNT:
|
||||
if (copy_from_user(&sr, arg, sizeof(sr)))
|
||||
@ -1673,20 +1685,20 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
|
||||
if (vr.vifi >= mrt->maxvif)
|
||||
return -EINVAL;
|
||||
vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
|
||||
read_lock(&mrt_lock);
|
||||
rcu_read_lock();
|
||||
vif = &mrt->vif_table[vr.vifi];
|
||||
if (VIF_EXISTS(mrt, vr.vifi)) {
|
||||
vr.icount = vif->pkt_in;
|
||||
vr.ocount = vif->pkt_out;
|
||||
vr.ibytes = vif->bytes_in;
|
||||
vr.obytes = vif->bytes_out;
|
||||
read_unlock(&mrt_lock);
|
||||
vr.icount = READ_ONCE(vif->pkt_in);
|
||||
vr.ocount = READ_ONCE(vif->pkt_out);
|
||||
vr.ibytes = READ_ONCE(vif->bytes_in);
|
||||
vr.obytes = READ_ONCE(vif->bytes_out);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (copy_to_user(arg, &vr, sizeof(vr)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return -EADDRNOTAVAIL;
|
||||
case SIOCGETSGCNT:
|
||||
if (copy_from_user(&sr, arg, sizeof(sr)))
|
||||
@ -1726,7 +1738,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
|
||||
ipmr_for_each_table(mrt, net) {
|
||||
v = &mrt->vif_table[0];
|
||||
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
|
||||
if (v->dev == dev)
|
||||
if (rcu_access_pointer(v->dev) == dev)
|
||||
vif_delete(mrt, ct, 1, NULL);
|
||||
}
|
||||
}
|
||||
@ -1804,26 +1816,28 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Processing handlers for ipmr_forward */
|
||||
/* Processing handlers for ipmr_forward, under rcu_read_lock() */
|
||||
|
||||
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
|
||||
int in_vifi, struct sk_buff *skb, int vifi)
|
||||
{
|
||||
const struct iphdr *iph = ip_hdr(skb);
|
||||
struct vif_device *vif = &mrt->vif_table[vifi];
|
||||
struct net_device *vif_dev;
|
||||
struct net_device *dev;
|
||||
struct rtable *rt;
|
||||
struct flowi4 fl4;
|
||||
int encap = 0;
|
||||
|
||||
if (!vif->dev)
|
||||
vif_dev = vif_dev_read(vif);
|
||||
if (!vif_dev)
|
||||
goto out_free;
|
||||
|
||||
if (vif->flags & VIFF_REGISTER) {
|
||||
vif->pkt_out++;
|
||||
vif->bytes_out += skb->len;
|
||||
vif->dev->stats.tx_bytes += skb->len;
|
||||
vif->dev->stats.tx_packets++;
|
||||
WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
|
||||
WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
|
||||
vif_dev->stats.tx_bytes += skb->len;
|
||||
vif_dev->stats.tx_packets++;
|
||||
ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
|
||||
goto out_free;
|
||||
}
|
||||
@ -1868,8 +1882,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
vif->pkt_out++;
|
||||
vif->bytes_out += skb->len;
|
||||
WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
|
||||
WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
|
||||
|
||||
skb_dst_drop(skb);
|
||||
skb_dst_set(skb, &rt->dst);
|
||||
@ -1881,8 +1895,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
|
||||
if (vif->flags & VIFF_TUNNEL) {
|
||||
ip_encap(net, skb, vif->local, vif->remote);
|
||||
/* FIXME: extra output firewall step used to be here. --RR */
|
||||
vif->dev->stats.tx_packets++;
|
||||
vif->dev->stats.tx_bytes += skb->len;
|
||||
vif_dev->stats.tx_packets++;
|
||||
vif_dev->stats.tx_bytes += skb->len;
|
||||
}
|
||||
|
||||
IPCB(skb)->flags |= IPSKB_FORWARDED;
|
||||
@ -1906,18 +1920,20 @@ out_free:
|
||||
kfree_skb(skb);
|
||||
}
|
||||
|
||||
static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
|
||||
/* Called with mrt_lock or rcu_read_lock() */
|
||||
static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev)
|
||||
{
|
||||
int ct;
|
||||
|
||||
for (ct = mrt->maxvif-1; ct >= 0; ct--) {
|
||||
if (mrt->vif_table[ct].dev == dev)
|
||||
/* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */
|
||||
for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
|
||||
if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
|
||||
break;
|
||||
}
|
||||
return ct;
|
||||
}
|
||||
|
||||
/* "local" means that we should preserve one skb (for local delivery) */
|
||||
/* Called uner rcu_read_lock() */
|
||||
static void ip_mr_forward(struct net *net, struct mr_table *mrt,
|
||||
struct net_device *dev, struct sk_buff *skb,
|
||||
struct mfc_cache *c, int local)
|
||||
@ -1944,7 +1960,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
|
||||
}
|
||||
|
||||
/* Wrong interface: drop packet and (maybe) send PIM assert. */
|
||||
if (mrt->vif_table[vif].dev != dev) {
|
||||
if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
|
||||
if (rt_is_output_route(skb_rtable(skb))) {
|
||||
/* It is our own packet, looped back.
|
||||
* Very complicated situation...
|
||||
@ -1983,8 +1999,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
|
||||
}
|
||||
|
||||
forward:
|
||||
mrt->vif_table[vif].pkt_in++;
|
||||
mrt->vif_table[vif].bytes_in += skb->len;
|
||||
WRITE_ONCE(mrt->vif_table[vif].pkt_in,
|
||||
mrt->vif_table[vif].pkt_in + 1);
|
||||
WRITE_ONCE(mrt->vif_table[vif].bytes_in,
|
||||
mrt->vif_table[vif].bytes_in + skb->len);
|
||||
|
||||
/* Forward the frame */
|
||||
if (c->mfc_origin == htonl(INADDR_ANY) &&
|
||||
@ -2140,22 +2158,14 @@ int ip_mr_input(struct sk_buff *skb)
|
||||
skb = skb2;
|
||||
}
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
vif = ipmr_find_vif(mrt, dev);
|
||||
if (vif >= 0) {
|
||||
int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev);
|
||||
read_unlock(&mrt_lock);
|
||||
|
||||
return err2;
|
||||
}
|
||||
read_unlock(&mrt_lock);
|
||||
if (vif >= 0)
|
||||
return ipmr_cache_unresolved(mrt, vif, skb, dev);
|
||||
kfree_skb(skb);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
ip_mr_forward(net, mrt, dev, skb, cache, local);
|
||||
read_unlock(&mrt_lock);
|
||||
|
||||
if (local)
|
||||
return ip_local_deliver(skb);
|
||||
@ -2252,18 +2262,15 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
|
||||
int vif = -1;
|
||||
|
||||
dev = skb->dev;
|
||||
read_lock(&mrt_lock);
|
||||
if (dev)
|
||||
vif = ipmr_find_vif(mrt, dev);
|
||||
if (vif < 0) {
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr));
|
||||
if (!skb2) {
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return -ENOMEM;
|
||||
}
|
||||
@ -2277,14 +2284,11 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
|
||||
iph->daddr = daddr;
|
||||
iph->version = 0;
|
||||
err = ipmr_cache_unresolved(mrt, vif, skb2, dev);
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return err;
|
||||
}
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return err;
|
||||
}
|
||||
@ -2404,7 +2408,7 @@ static size_t igmpmsg_netlink_msgsize(size_t payloadlen)
|
||||
return len;
|
||||
}
|
||||
|
||||
static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
|
||||
static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
|
||||
{
|
||||
struct net *net = read_pnet(&mrt->net);
|
||||
struct nlmsghdr *nlh;
|
||||
@ -2744,18 +2748,21 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
|
||||
|
||||
static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb)
|
||||
{
|
||||
struct net_device *vif_dev;
|
||||
struct nlattr *vif_nest;
|
||||
struct vif_device *vif;
|
||||
|
||||
vif = &mrt->vif_table[vifid];
|
||||
vif_dev = vif_dev_read(vif);
|
||||
/* if the VIF doesn't exist just continue */
|
||||
if (!VIF_EXISTS(mrt, vifid))
|
||||
if (!vif_dev)
|
||||
return true;
|
||||
|
||||
vif = &mrt->vif_table[vifid];
|
||||
vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF);
|
||||
if (!vif_nest)
|
||||
return false;
|
||||
if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) ||
|
||||
|
||||
if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif_dev->ifindex) ||
|
||||
nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) ||
|
||||
nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) ||
|
||||
nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in,
|
||||
@ -2887,7 +2894,7 @@ out:
|
||||
*/
|
||||
|
||||
static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
__acquires(mrt_lock)
|
||||
__acquires(RCU)
|
||||
{
|
||||
struct mr_vif_iter *iter = seq->private;
|
||||
struct net *net = seq_file_net(seq);
|
||||
@ -2899,14 +2906,14 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
|
||||
iter->mrt = mrt;
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
rcu_read_lock();
|
||||
return mr_vif_seq_start(seq, pos);
|
||||
}
|
||||
|
||||
static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
|
||||
__releases(mrt_lock)
|
||||
__releases(RCU)
|
||||
{
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
|
||||
@ -2919,9 +2926,11 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
|
||||
"Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
|
||||
} else {
|
||||
const struct vif_device *vif = v;
|
||||
const char *name = vif->dev ?
|
||||
vif->dev->name : "none";
|
||||
const struct net_device *vif_dev;
|
||||
const char *name;
|
||||
|
||||
vif_dev = vif_dev_read(vif);
|
||||
name = vif_dev ? vif_dev->name : "none";
|
||||
seq_printf(seq,
|
||||
"%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
|
||||
vif - mrt->vif_table,
|
||||
@ -3017,7 +3026,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump,
|
||||
ipmr_mr_table_iter, &mrt_lock, extack);
|
||||
ipmr_mr_table_iter, extack);
|
||||
}
|
||||
|
||||
static const struct fib_notifier_ops ipmr_notifier_ops_template = {
|
||||
|
@ -13,7 +13,7 @@ void vif_device_init(struct vif_device *v,
|
||||
unsigned short flags,
|
||||
unsigned short get_iflink_mask)
|
||||
{
|
||||
v->dev = NULL;
|
||||
RCU_INIT_POINTER(v->dev, NULL);
|
||||
v->bytes_in = 0;
|
||||
v->bytes_out = 0;
|
||||
v->pkt_in = 0;
|
||||
@ -208,6 +208,7 @@ EXPORT_SYMBOL(mr_mfc_seq_next);
|
||||
int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
|
||||
struct mr_mfc *c, struct rtmsg *rtm)
|
||||
{
|
||||
struct net_device *vif_dev;
|
||||
struct rta_mfc_stats mfcs;
|
||||
struct nlattr *mp_attr;
|
||||
struct rtnexthop *nhp;
|
||||
@ -220,10 +221,13 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (VIF_EXISTS(mrt, c->mfc_parent) &&
|
||||
nla_put_u32(skb, RTA_IIF,
|
||||
mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
|
||||
rcu_read_lock();
|
||||
vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev);
|
||||
if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) {
|
||||
rcu_read_unlock();
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (c->mfc_flags & MFC_OFFLOAD)
|
||||
rtm->rtm_flags |= RTNH_F_OFFLOAD;
|
||||
@ -232,23 +236,27 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
|
||||
if (!mp_attr)
|
||||
return -EMSGSIZE;
|
||||
|
||||
rcu_read_lock();
|
||||
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
|
||||
if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
|
||||
struct vif_device *vif;
|
||||
struct vif_device *vif = &mrt->vif_table[ct];
|
||||
|
||||
vif_dev = rcu_dereference(vif->dev);
|
||||
if (vif_dev && c->mfc_un.res.ttls[ct] < 255) {
|
||||
|
||||
nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
|
||||
if (!nhp) {
|
||||
rcu_read_unlock();
|
||||
nla_nest_cancel(skb, mp_attr);
|
||||
return -EMSGSIZE;
|
||||
}
|
||||
|
||||
nhp->rtnh_flags = 0;
|
||||
nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
|
||||
vif = &mrt->vif_table[ct];
|
||||
nhp->rtnh_ifindex = vif->dev->ifindex;
|
||||
nhp->rtnh_ifindex = vif_dev->ifindex;
|
||||
nhp->rtnh_len = sizeof(*nhp);
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
nla_nest_end(skb, mp_attr);
|
||||
|
||||
@ -275,13 +283,14 @@ static bool mr_mfc_uses_dev(const struct mr_table *mrt,
|
||||
int ct;
|
||||
|
||||
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
|
||||
if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
|
||||
const struct vif_device *vif;
|
||||
const struct net_device *vif_dev;
|
||||
const struct vif_device *vif;
|
||||
|
||||
vif = &mrt->vif_table[ct];
|
||||
if (vif->dev == dev)
|
||||
return true;
|
||||
}
|
||||
vif = &mrt->vif_table[ct];
|
||||
vif_dev = rcu_access_pointer(vif->dev);
|
||||
if (vif_dev && c->mfc_un.res.ttls[ct] < 255 &&
|
||||
vif_dev == dev)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -390,7 +399,6 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
|
||||
struct netlink_ext_ack *extack),
|
||||
struct mr_table *(*mr_iter)(struct net *net,
|
||||
struct mr_table *mrt),
|
||||
rwlock_t *mrt_lock,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
struct mr_table *mrt;
|
||||
@ -402,22 +410,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
|
||||
|
||||
for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
|
||||
struct vif_device *v = &mrt->vif_table[0];
|
||||
struct net_device *vif_dev;
|
||||
struct mr_mfc *mfc;
|
||||
int vifi;
|
||||
|
||||
/* Notifiy on table VIF entries */
|
||||
read_lock(mrt_lock);
|
||||
rcu_read_lock();
|
||||
for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
|
||||
if (!v->dev)
|
||||
vif_dev = rcu_dereference(v->dev);
|
||||
if (!vif_dev)
|
||||
continue;
|
||||
|
||||
err = mr_call_vif_notifier(nb, family,
|
||||
FIB_EVENT_VIF_ADD,
|
||||
v, vifi, mrt->id, extack);
|
||||
FIB_EVENT_VIF_ADD, v,
|
||||
vif_dev, vifi,
|
||||
mrt->id, extack);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
read_unlock(mrt_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
202
net/ipv6/ip6mr.c
202
net/ipv6/ip6mr.c
@ -62,7 +62,12 @@ struct ip6mr_result {
|
||||
Note that the changes are semaphored via rtnl_lock.
|
||||
*/
|
||||
|
||||
static DEFINE_RWLOCK(mrt_lock);
|
||||
static DEFINE_SPINLOCK(mrt_lock);
|
||||
|
||||
static struct net_device *vif_dev_read(const struct vif_device *vif)
|
||||
{
|
||||
return rcu_dereference(vif->dev);
|
||||
}
|
||||
|
||||
/* Multicast router control variables */
|
||||
|
||||
@ -85,11 +90,11 @@ static void ip6mr_free_table(struct mr_table *mrt);
|
||||
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
|
||||
struct net_device *dev, struct sk_buff *skb,
|
||||
struct mfc6_cache *cache);
|
||||
static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
|
||||
static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
|
||||
mifi_t mifi, int assert);
|
||||
static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
|
||||
int cmd);
|
||||
static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
|
||||
static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt);
|
||||
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
|
||||
struct netlink_callback *cb);
|
||||
static void mroute_clean_tables(struct mr_table *mrt, int flags);
|
||||
@ -398,7 +403,7 @@ static void ip6mr_free_table(struct mr_table *mrt)
|
||||
*/
|
||||
|
||||
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
__acquires(mrt_lock)
|
||||
__acquires(RCU)
|
||||
{
|
||||
struct mr_vif_iter *iter = seq->private;
|
||||
struct net *net = seq_file_net(seq);
|
||||
@ -410,14 +415,14 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
|
||||
iter->mrt = mrt;
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
rcu_read_lock();
|
||||
return mr_vif_seq_start(seq, pos);
|
||||
}
|
||||
|
||||
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
|
||||
__releases(mrt_lock)
|
||||
__releases(RCU)
|
||||
{
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
|
||||
@ -430,7 +435,11 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
|
||||
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
|
||||
} else {
|
||||
const struct vif_device *vif = v;
|
||||
const char *name = vif->dev ? vif->dev->name : "none";
|
||||
const struct net_device *vif_dev;
|
||||
const char *name;
|
||||
|
||||
vif_dev = vif_dev_read(vif);
|
||||
name = vif_dev ? vif_dev->name : "none";
|
||||
|
||||
seq_printf(seq,
|
||||
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
|
||||
@ -549,13 +558,11 @@ static int pim6_rcv(struct sk_buff *skb)
|
||||
|
||||
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
|
||||
goto drop;
|
||||
reg_vif_num = mrt->mroute_reg_vif_num;
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
/* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */
|
||||
reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
|
||||
if (reg_vif_num >= 0)
|
||||
reg_dev = mrt->vif_table[reg_vif_num].dev;
|
||||
dev_hold(reg_dev);
|
||||
read_unlock(&mrt_lock);
|
||||
reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]);
|
||||
|
||||
if (!reg_dev)
|
||||
goto drop;
|
||||
@ -570,7 +577,6 @@ static int pim6_rcv(struct sk_buff *skb)
|
||||
|
||||
netif_rx(skb);
|
||||
|
||||
dev_put(reg_dev);
|
||||
return 0;
|
||||
drop:
|
||||
kfree_skb(skb);
|
||||
@ -600,11 +606,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
|
||||
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
|
||||
goto tx_err;
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
dev->stats.tx_bytes += skb->len;
|
||||
dev->stats.tx_packets++;
|
||||
ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_lock();
|
||||
ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num),
|
||||
MRT6MSG_WHOLEPKT);
|
||||
rcu_read_unlock();
|
||||
kfree_skb(skb);
|
||||
return NETDEV_TX_OK;
|
||||
|
||||
@ -670,10 +677,11 @@ failure:
|
||||
static int call_ip6mr_vif_entry_notifiers(struct net *net,
|
||||
enum fib_event_type event_type,
|
||||
struct vif_device *vif,
|
||||
struct net_device *vif_dev,
|
||||
mifi_t vif_index, u32 tb_id)
|
||||
{
|
||||
return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
|
||||
vif, vif_index, tb_id,
|
||||
vif, vif_dev, vif_index, tb_id,
|
||||
&net->ipv6.ipmr_seq);
|
||||
}
|
||||
|
||||
@ -698,23 +706,21 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
|
||||
|
||||
v = &mrt->vif_table[vifi];
|
||||
|
||||
if (VIF_EXISTS(mrt, vifi))
|
||||
call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
|
||||
FIB_EVENT_VIF_DEL, v, vifi,
|
||||
mrt->id);
|
||||
|
||||
write_lock_bh(&mrt_lock);
|
||||
dev = v->dev;
|
||||
v->dev = NULL;
|
||||
|
||||
if (!dev) {
|
||||
write_unlock_bh(&mrt_lock);
|
||||
dev = rtnl_dereference(v->dev);
|
||||
if (!dev)
|
||||
return -EADDRNOTAVAIL;
|
||||
}
|
||||
|
||||
call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
|
||||
FIB_EVENT_VIF_DEL, v, dev,
|
||||
vifi, mrt->id);
|
||||
spin_lock(&mrt_lock);
|
||||
RCU_INIT_POINTER(v->dev, NULL);
|
||||
|
||||
#ifdef CONFIG_IPV6_PIMSM_V2
|
||||
if (vifi == mrt->mroute_reg_vif_num)
|
||||
mrt->mroute_reg_vif_num = -1;
|
||||
if (vifi == mrt->mroute_reg_vif_num) {
|
||||
/* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */
|
||||
WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (vifi + 1 == mrt->maxvif) {
|
||||
@ -723,10 +729,10 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
|
||||
if (VIF_EXISTS(mrt, tmp))
|
||||
break;
|
||||
}
|
||||
mrt->maxvif = tmp + 1;
|
||||
WRITE_ONCE(mrt->maxvif, tmp + 1);
|
||||
}
|
||||
|
||||
write_unlock_bh(&mrt_lock);
|
||||
spin_unlock(&mrt_lock);
|
||||
|
||||
dev_set_allmulti(dev, -1);
|
||||
|
||||
@ -826,7 +832,7 @@ static void ipmr_expire_process(struct timer_list *t)
|
||||
spin_unlock(&mfc_unres_lock);
|
||||
}
|
||||
|
||||
/* Fill oifs list. It is called under write locked mrt_lock. */
|
||||
/* Fill oifs list. It is called under locked mrt_lock. */
|
||||
|
||||
static void ip6mr_update_thresholds(struct mr_table *mrt,
|
||||
struct mr_mfc *cache,
|
||||
@ -912,18 +918,18 @@ static int mif6_add(struct net *net, struct mr_table *mrt,
|
||||
MIFF_REGISTER);
|
||||
|
||||
/* And finish update writing critical data */
|
||||
write_lock_bh(&mrt_lock);
|
||||
v->dev = dev;
|
||||
spin_lock(&mrt_lock);
|
||||
rcu_assign_pointer(v->dev, dev);
|
||||
netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC);
|
||||
#ifdef CONFIG_IPV6_PIMSM_V2
|
||||
if (v->flags & MIFF_REGISTER)
|
||||
mrt->mroute_reg_vif_num = vifi;
|
||||
WRITE_ONCE(mrt->mroute_reg_vif_num, vifi);
|
||||
#endif
|
||||
if (vifi + 1 > mrt->maxvif)
|
||||
mrt->maxvif = vifi + 1;
|
||||
write_unlock_bh(&mrt_lock);
|
||||
WRITE_ONCE(mrt->maxvif, vifi + 1);
|
||||
spin_unlock(&mrt_lock);
|
||||
call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
|
||||
v, vifi, mrt->id);
|
||||
v, dev, vifi, mrt->id);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1028,10 +1034,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
|
||||
/*
|
||||
* Bounce a cache query up to pim6sd and netlink.
|
||||
*
|
||||
* Called under mrt_lock.
|
||||
* Called under rcu_read_lock()
|
||||
*/
|
||||
|
||||
static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
|
||||
static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
|
||||
mifi_t mifi, int assert)
|
||||
{
|
||||
struct sock *mroute6_sk;
|
||||
@ -1072,7 +1078,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
|
||||
if (assert == MRT6MSG_WRMIFWHOLE)
|
||||
msg->im6_mif = mifi;
|
||||
else
|
||||
msg->im6_mif = mrt->mroute_reg_vif_num;
|
||||
msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num);
|
||||
msg->im6_pad = 0;
|
||||
msg->im6_src = ipv6_hdr(pkt)->saddr;
|
||||
msg->im6_dst = ipv6_hdr(pkt)->daddr;
|
||||
@ -1107,10 +1113,8 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
mroute6_sk = rcu_dereference(mrt->mroute_sk);
|
||||
if (!mroute6_sk) {
|
||||
rcu_read_unlock();
|
||||
kfree_skb(skb);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -1119,7 +1123,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
|
||||
|
||||
/* Deliver to user space multicast routing algorithms */
|
||||
ret = sock_queue_rcv_skb(mroute6_sk, skb);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (ret < 0) {
|
||||
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
|
||||
kfree_skb(skb);
|
||||
@ -1243,7 +1247,7 @@ static int ip6mr_device_event(struct notifier_block *this,
|
||||
ip6mr_for_each_table(mrt, net) {
|
||||
v = &mrt->vif_table[0];
|
||||
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
|
||||
if (v->dev == dev)
|
||||
if (rcu_access_pointer(v->dev) == dev)
|
||||
mif6_delete(mrt, ct, 1, NULL);
|
||||
}
|
||||
}
|
||||
@ -1262,7 +1266,7 @@ static int ip6mr_dump(struct net *net, struct notifier_block *nb,
|
||||
struct netlink_ext_ack *extack)
|
||||
{
|
||||
return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
|
||||
ip6mr_mr_table_iter, &mrt_lock, extack);
|
||||
ip6mr_mr_table_iter, extack);
|
||||
}
|
||||
|
||||
static struct notifier_block ip6_mr_notifier = {
|
||||
@ -1437,12 +1441,12 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
|
||||
&mfc->mf6cc_mcastgrp.sin6_addr, parent);
|
||||
rcu_read_unlock();
|
||||
if (c) {
|
||||
write_lock_bh(&mrt_lock);
|
||||
spin_lock(&mrt_lock);
|
||||
c->_c.mfc_parent = mfc->mf6cc_parent;
|
||||
ip6mr_update_thresholds(mrt, &c->_c, ttls);
|
||||
if (!mrtsock)
|
||||
c->_c.mfc_flags |= MFC_STATIC;
|
||||
write_unlock_bh(&mrt_lock);
|
||||
spin_unlock(&mrt_lock);
|
||||
call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
|
||||
c, mrt->id);
|
||||
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
|
||||
@ -1560,7 +1564,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
|
||||
struct net *net = sock_net(sk);
|
||||
|
||||
rtnl_lock();
|
||||
write_lock_bh(&mrt_lock);
|
||||
spin_lock(&mrt_lock);
|
||||
if (rtnl_dereference(mrt->mroute_sk)) {
|
||||
err = -EADDRINUSE;
|
||||
} else {
|
||||
@ -1568,7 +1572,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
|
||||
sock_set_flag(sk, SOCK_RCU_FREE);
|
||||
atomic_inc(&net->ipv6.devconf_all->mc_forwarding);
|
||||
}
|
||||
write_unlock_bh(&mrt_lock);
|
||||
spin_unlock(&mrt_lock);
|
||||
|
||||
if (!err)
|
||||
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
|
||||
@ -1598,14 +1602,14 @@ int ip6mr_sk_done(struct sock *sk)
|
||||
rtnl_lock();
|
||||
ip6mr_for_each_table(mrt, net) {
|
||||
if (sk == rtnl_dereference(mrt->mroute_sk)) {
|
||||
write_lock_bh(&mrt_lock);
|
||||
spin_lock(&mrt_lock);
|
||||
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
|
||||
/* Note that mroute_sk had SOCK_RCU_FREE set,
|
||||
* so the RCU grace period before sk freeing
|
||||
* is guaranteed by sk_destruct()
|
||||
*/
|
||||
atomic_dec(&devconf->mc_forwarding);
|
||||
write_unlock_bh(&mrt_lock);
|
||||
spin_unlock(&mrt_lock);
|
||||
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
|
||||
NETCONFA_MC_FORWARDING,
|
||||
NETCONFA_IFINDEX_ALL,
|
||||
@ -1891,20 +1895,20 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
|
||||
if (vr.mifi >= mrt->maxvif)
|
||||
return -EINVAL;
|
||||
vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
|
||||
read_lock(&mrt_lock);
|
||||
rcu_read_lock();
|
||||
vif = &mrt->vif_table[vr.mifi];
|
||||
if (VIF_EXISTS(mrt, vr.mifi)) {
|
||||
vr.icount = vif->pkt_in;
|
||||
vr.ocount = vif->pkt_out;
|
||||
vr.ibytes = vif->bytes_in;
|
||||
vr.obytes = vif->bytes_out;
|
||||
read_unlock(&mrt_lock);
|
||||
vr.icount = READ_ONCE(vif->pkt_in);
|
||||
vr.ocount = READ_ONCE(vif->pkt_out);
|
||||
vr.ibytes = READ_ONCE(vif->bytes_in);
|
||||
vr.obytes = READ_ONCE(vif->bytes_out);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (copy_to_user(arg, &vr, sizeof(vr)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return -EADDRNOTAVAIL;
|
||||
case SIOCGETSGCNT_IN6:
|
||||
if (copy_from_user(&sr, arg, sizeof(sr)))
|
||||
@ -1966,20 +1970,20 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
|
||||
if (vr.mifi >= mrt->maxvif)
|
||||
return -EINVAL;
|
||||
vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
|
||||
read_lock(&mrt_lock);
|
||||
rcu_read_lock();
|
||||
vif = &mrt->vif_table[vr.mifi];
|
||||
if (VIF_EXISTS(mrt, vr.mifi)) {
|
||||
vr.icount = vif->pkt_in;
|
||||
vr.ocount = vif->pkt_out;
|
||||
vr.ibytes = vif->bytes_in;
|
||||
vr.obytes = vif->bytes_out;
|
||||
read_unlock(&mrt_lock);
|
||||
vr.icount = READ_ONCE(vif->pkt_in);
|
||||
vr.ocount = READ_ONCE(vif->pkt_out);
|
||||
vr.ibytes = READ_ONCE(vif->bytes_in);
|
||||
vr.obytes = READ_ONCE(vif->bytes_out);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (copy_to_user(arg, &vr, sizeof(vr)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return -EADDRNOTAVAIL;
|
||||
case SIOCGETSGCNT_IN6:
|
||||
if (copy_from_user(&sr, arg, sizeof(sr)))
|
||||
@ -2021,21 +2025,22 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
|
||||
static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
|
||||
struct sk_buff *skb, int vifi)
|
||||
{
|
||||
struct ipv6hdr *ipv6h;
|
||||
struct vif_device *vif = &mrt->vif_table[vifi];
|
||||
struct net_device *dev;
|
||||
struct net_device *vif_dev;
|
||||
struct ipv6hdr *ipv6h;
|
||||
struct dst_entry *dst;
|
||||
struct flowi6 fl6;
|
||||
|
||||
if (!vif->dev)
|
||||
vif_dev = vif_dev_read(vif);
|
||||
if (!vif_dev)
|
||||
goto out_free;
|
||||
|
||||
#ifdef CONFIG_IPV6_PIMSM_V2
|
||||
if (vif->flags & MIFF_REGISTER) {
|
||||
vif->pkt_out++;
|
||||
vif->bytes_out += skb->len;
|
||||
vif->dev->stats.tx_bytes += skb->len;
|
||||
vif->dev->stats.tx_packets++;
|
||||
WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
|
||||
WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
|
||||
vif_dev->stats.tx_bytes += skb->len;
|
||||
vif_dev->stats.tx_packets++;
|
||||
ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
|
||||
goto out_free;
|
||||
}
|
||||
@ -2068,14 +2073,13 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
|
||||
* not mrouter) cannot join to more than one interface - it will
|
||||
* result in receiving multiple packets.
|
||||
*/
|
||||
dev = vif->dev;
|
||||
skb->dev = dev;
|
||||
vif->pkt_out++;
|
||||
vif->bytes_out += skb->len;
|
||||
skb->dev = vif_dev;
|
||||
WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1);
|
||||
WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len);
|
||||
|
||||
/* We are about to write */
|
||||
/* XXX: extension headers? */
|
||||
if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
|
||||
if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev)))
|
||||
goto out_free;
|
||||
|
||||
ipv6h = ipv6_hdr(skb);
|
||||
@ -2084,7 +2088,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
|
||||
IP6CB(skb)->flags |= IP6SKB_FORWARDED;
|
||||
|
||||
return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
|
||||
net, NULL, skb, skb->dev, dev,
|
||||
net, NULL, skb, skb->dev, vif_dev,
|
||||
ip6mr_forward2_finish);
|
||||
|
||||
out_free:
|
||||
@ -2092,17 +2096,20 @@ out_free:
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called with rcu_read_lock() */
|
||||
static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
|
||||
{
|
||||
int ct;
|
||||
|
||||
for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
|
||||
if (mrt->vif_table[ct].dev == dev)
|
||||
/* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */
|
||||
for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) {
|
||||
if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev)
|
||||
break;
|
||||
}
|
||||
return ct;
|
||||
}
|
||||
|
||||
/* Called under rcu_read_lock() */
|
||||
static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
|
||||
struct net_device *dev, struct sk_buff *skb,
|
||||
struct mfc6_cache *c)
|
||||
@ -2122,20 +2129,18 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
|
||||
/* For an (*,G) entry, we only check that the incoming
|
||||
* interface is part of the static tree.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
cache_proxy = mr_mfc_find_any_parent(mrt, vif);
|
||||
if (cache_proxy &&
|
||||
cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
|
||||
rcu_read_unlock();
|
||||
goto forward;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrong interface: drop packet and (maybe) send PIM assert.
|
||||
*/
|
||||
if (mrt->vif_table[vif].dev != dev) {
|
||||
if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) {
|
||||
c->_c.mfc_un.res.wrong_if++;
|
||||
|
||||
if (true_vifi >= 0 && mrt->mroute_do_assert &&
|
||||
@ -2159,8 +2164,10 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
|
||||
}
|
||||
|
||||
forward:
|
||||
mrt->vif_table[vif].pkt_in++;
|
||||
mrt->vif_table[vif].bytes_in += skb->len;
|
||||
WRITE_ONCE(mrt->vif_table[vif].pkt_in,
|
||||
mrt->vif_table[vif].pkt_in + 1);
|
||||
WRITE_ONCE(mrt->vif_table[vif].bytes_in,
|
||||
mrt->vif_table[vif].bytes_in + skb->len);
|
||||
|
||||
/*
|
||||
* Forward the frame
|
||||
@ -2238,7 +2245,6 @@ int ip6_mr_input(struct sk_buff *skb)
|
||||
return err;
|
||||
}
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
cache = ip6mr_cache_find(mrt,
|
||||
&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
|
||||
if (!cache) {
|
||||
@ -2259,19 +2265,15 @@ int ip6_mr_input(struct sk_buff *skb)
|
||||
vif = ip6mr_find_vif(mrt, dev);
|
||||
if (vif >= 0) {
|
||||
int err = ip6mr_cache_unresolved(mrt, vif, skb, dev);
|
||||
read_unlock(&mrt_lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
read_unlock(&mrt_lock);
|
||||
kfree_skb(skb);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
ip6_mr_forward(net, mrt, dev, skb, cache);
|
||||
|
||||
read_unlock(&mrt_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2287,7 +2289,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
|
||||
if (!mrt)
|
||||
return -ENOENT;
|
||||
|
||||
read_lock(&mrt_lock);
|
||||
rcu_read_lock();
|
||||
cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
|
||||
if (!cache && skb->dev) {
|
||||
int vif = ip6mr_find_vif(mrt, skb->dev);
|
||||
@ -2305,14 +2307,14 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
|
||||
|
||||
dev = skb->dev;
|
||||
if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* really correct? */
|
||||
skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
|
||||
if (!skb2) {
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -2335,13 +2337,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
|
||||
iph->daddr = rt->rt6i_dst.addr;
|
||||
|
||||
err = ip6mr_cache_unresolved(mrt, vif, skb2, dev);
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
|
||||
read_unlock(&mrt_lock);
|
||||
rcu_read_unlock();
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -2460,7 +2462,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
|
||||
return len;
|
||||
}
|
||||
|
||||
static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
|
||||
static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt)
|
||||
{
|
||||
struct net *net = read_pnet(&mrt->net);
|
||||
struct nlmsghdr *nlh;
|
||||
|
Loading…
x
Reference in New Issue
Block a user