Merge branch 'inet-frag-fixes'

Florian Westphal says:

====================
inet: ip defrag bug fixes

Johan Schuijt and Frank Schreuder reported crash and softlockup after the
inet workqueue eviction change:

general protection fault: 0000 [#1] SMP
CPU: 0 PID: 4 Comm: kworker/0:0 Not tainted 3.18.18-transip-1.5 #1
Workqueue: events inet_frag_worker
task: ffff880224935130 ti: ffff880224938000 task.ti: ffff880224938000
RIP: 0010:[<ffffffff8149288c>] [<ffffffff8149288c>] inet_evict_bucket+0xfc/0x160
RSP: 0018:ffff88022493bd58  EFLAGS: 00010286
RAX: ffff88021f4f3e80 RBX: dead000000100100 RCX: 000000000000006b
RDX: 000000000000006c RSI: ffff88021f4f3e80 RDI: dead0000001000a8
RBP: 0000000000000002 R08: ffff880222273900 R09: ffff880036e49200
R10: ffff8800c6e86500 R11: ffff880036f45500 R12: ffffffff81a87100
R13: ffff88022493bd70 R14: 0000000000000000 R15: ffff8800c9b26280
[..]
Call Trace:
 [<ffffffff814929e0>] ? inet_frag_worker+0x60/0x210
 [<ffffffff8107e3a2>] ? process_one_work+0x142/0x3b0
 [<ffffffff8107eb94>] ? worker_thread+0x114/0x440
[..]

A second issue results in softlockup since the evictor may restart the
eviction loop for a (potentially) unlimited number of times while local
softirqs are disabled.

Frank reports that test system remained stable for 14 hours of testing
(before, crash occured within half an hour in their setup).
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-07-26 21:00:15 -07:00
commit 64b892ad23
6 changed files with 42 additions and 47 deletions

View File

@ -21,13 +21,11 @@ struct netns_frags {
* @INET_FRAG_FIRST_IN: first fragment has arrived
* @INET_FRAG_LAST_IN: final fragment has arrived
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
* @INET_FRAG_EVICTED: frag queue is being evicted
*/
enum {
INET_FRAG_FIRST_IN = BIT(0),
INET_FRAG_LAST_IN = BIT(1),
INET_FRAG_COMPLETE = BIT(2),
INET_FRAG_EVICTED = BIT(3)
};
/**
@ -45,6 +43,7 @@ enum {
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @net: namespace that this frag belongs to
* @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
*/
struct inet_frag_queue {
spinlock_t lock;
@ -59,6 +58,7 @@ struct inet_frag_queue {
__u8 flags;
u16 max_size;
struct netns_frags *net;
struct hlist_node list_evictor;
};
#define INETFRAGS_HASHSZ 1024
@ -125,6 +125,11 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f
inet_frag_destroy(q, f);
}
static inline bool inet_frag_evicting(struct inet_frag_queue *q)
{
return !hlist_unhashed(&q->list_evictor);
}
/* Memory Tracking Functions. */
/* The default percpu_counter batch size is not big enough to scale to
@ -139,14 +144,14 @@ static inline int frag_mem_limit(struct netns_frags *nf)
return percpu_counter_read(&nf->mem);
}
static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i)
static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
{
__percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch);
__percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch);
}
static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i)
static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
{
__percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch);
__percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch);
}
static inline void init_frag_mem_limit(struct netns_frags *nf)

View File

@ -207,7 +207,7 @@ found:
} else {
fq->q.meat += skb->len;
}
add_frag_mem_limit(&fq->q, skb->truesize);
add_frag_mem_limit(fq->q.net, skb->truesize);
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
clone->data_len = clone->len;
head->data_len -= clone->len;
head->len -= clone->len;
add_frag_mem_limit(&fq->q, clone->truesize);
add_frag_mem_limit(fq->q.net, clone->truesize);
}
WARN_ON(head == NULL);
@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
}
fp = next;
}
sub_frag_mem_limit(&fq->q, sum_truesize);
sub_frag_mem_limit(fq->q.net, sum_truesize);
head->next = NULL;
head->dev = dev;

View File

@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
unsigned int evicted = 0;
HLIST_HEAD(expired);
evict_again:
spin_lock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
if (!inet_fragq_should_evict(fq))
continue;
if (!del_timer(&fq->timer)) {
/* q expiring right now thus increment its refcount so
* it won't be freed under us and wait until the timer
* has finished executing then destroy it
*/
atomic_inc(&fq->refcnt);
spin_unlock(&hb->chain_lock);
del_timer_sync(&fq->timer);
inet_frag_put(fq, f);
goto evict_again;
}
if (!del_timer(&fq->timer))
continue;
fq->flags |= INET_FRAG_EVICTED;
hlist_del(&fq->list);
hlist_add_head(&fq->list, &expired);
hlist_add_head(&fq->list_evictor, &expired);
++evicted;
}
spin_unlock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &expired, list)
hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
f->frag_expire((unsigned long) fq);
return evicted;
@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
int i;
nf->low_thresh = 0;
local_bh_disable();
evict_again:
local_bh_disable();
seq = read_seqbegin(&f->rnd_seqlock);
for (i = 0; i < INETFRAGS_HASHSZ ; i++)
inet_evict_bucket(f, &f->hash[i]);
if (read_seqretry(&f->rnd_seqlock, seq))
goto evict_again;
local_bh_enable();
cond_resched();
if (read_seqretry(&f->rnd_seqlock, seq) ||
percpu_counter_sum(&nf->mem))
goto evict_again;
percpu_counter_destroy(&nf->mem);
}
@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
struct inet_frag_bucket *hb;
hb = get_frag_bucket_locked(fq, f);
if (!(fq->flags & INET_FRAG_EVICTED))
hlist_del(&fq->list);
hlist_del(&fq->list);
fq->flags |= INET_FRAG_COMPLETE;
spin_unlock(&hb->chain_lock);
}
@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq_unlink(fq, f);
atomic_dec(&fq->refcnt);
fq->flags |= INET_FRAG_COMPLETE;
}
}
EXPORT_SYMBOL(inet_frag_kill);
@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
fp = xp;
}
sum = sum_truesize + f->qsize;
sub_frag_mem_limit(q, sum);
if (f->destructor)
f->destructor(q);
kmem_cache_free(f->frags_cachep, q);
sub_frag_mem_limit(nf, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
q->net = nf;
f->constructor(q, arg);
add_frag_mem_limit(q, f->qsize);
add_frag_mem_limit(nf, f->qsize);
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);

View File

@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg)
ipq_kill(qp);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
if (!(qp->q.flags & INET_FRAG_EVICTED)) {
if (!inet_frag_evicting(&qp->q)) {
struct sk_buff *head = qp->q.fragments;
const struct iphdr *iph;
int err;
@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp)
kfree_skb(fp);
fp = xp;
} while (fp);
sub_frag_mem_limit(&qp->q, sum_truesize);
sub_frag_mem_limit(qp->q.net, sum_truesize);
qp->q.flags = 0;
qp->q.len = 0;
@ -455,7 +455,7 @@ found:
qp->q.fragments = next;
qp->q.meat -= free_it->len;
sub_frag_mem_limit(&qp->q, free_it->truesize);
sub_frag_mem_limit(qp->q.net, free_it->truesize);
kfree_skb(free_it);
}
}
@ -479,7 +479,7 @@ found:
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
add_frag_mem_limit(&qp->q, skb->truesize);
add_frag_mem_limit(qp->q.net, skb->truesize);
if (offset == 0)
qp->q.flags |= INET_FRAG_FIRST_IN;
@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
add_frag_mem_limit(&qp->q, clone->truesize);
add_frag_mem_limit(qp->q.net, clone->truesize);
}
skb_push(head, head->data - skb_network_header(head));
@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
fp = next;
}
sub_frag_mem_limit(&qp->q, sum_truesize);
sub_frag_mem_limit(qp->q.net, sum_truesize);
head->next = NULL;
head->dev = dev;

View File

@ -348,7 +348,7 @@ found:
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
add_frag_mem_limit(&fq->q, skb->truesize);
add_frag_mem_limit(fq->q.net, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
add_frag_mem_limit(&fq->q, clone->truesize);
add_frag_mem_limit(fq->q.net, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
sub_frag_mem_limit(&fq->q, head->truesize);
sub_frag_mem_limit(fq->q.net, head->truesize);
head->ignore_df = 1;
head->next = NULL;

View File

@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
if (fq->q.flags & INET_FRAG_EVICTED)
if (inet_frag_evicting(&fq->q))
goto out_rcu_unlock;
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
@ -330,7 +330,7 @@ found:
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(&fq->q, skb->truesize);
add_frag_mem_limit(fq->q.net, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
add_frag_mem_limit(&fq->q, clone->truesize);
add_frag_mem_limit(fq->q.net, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
fp = next;
}
sub_frag_mem_limit(&fq->q, sum_truesize);
sub_frag_mem_limit(fq->q.net, sum_truesize);
head->next = NULL;
head->dev = dev;