netfilter: nft_set_rbtree: prefer sync gc to async worker
There is no need for asynchronous garbage collection, rbtree inserts can only happen from the netlink control plane. We already perform on-demand gc on insertion, in the area of the tree where the insertion takes place, but we don't do a full tree walk there for performance reasons. Do a full gc walk at the end of the transaction instead and remove the async worker. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
This commit is contained in:
parent
8079fc30f7
commit
7d259f021a
@ -19,7 +19,7 @@ struct nft_rbtree {
|
|||||||
struct rb_root root;
|
struct rb_root root;
|
||||||
rwlock_t lock;
|
rwlock_t lock;
|
||||||
seqcount_rwlock_t count;
|
seqcount_rwlock_t count;
|
||||||
struct delayed_work gc_work;
|
unsigned long last_gc;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct nft_rbtree_elem {
|
struct nft_rbtree_elem {
|
||||||
@ -48,8 +48,7 @@ static int nft_rbtree_cmp(const struct nft_set *set,
|
|||||||
|
|
||||||
static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
|
static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
|
||||||
{
|
{
|
||||||
return nft_set_elem_expired(&rbe->ext) ||
|
return nft_set_elem_expired(&rbe->ext);
|
||||||
nft_set_elem_is_dead(&rbe->ext);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
|
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
|
||||||
@ -508,6 +507,15 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
|
||||||
|
{
|
||||||
|
write_lock_bh(&priv->lock);
|
||||||
|
write_seqcount_begin(&priv->count);
|
||||||
|
rb_erase(&rbe->node, &priv->root);
|
||||||
|
write_seqcount_end(&priv->count);
|
||||||
|
write_unlock_bh(&priv->lock);
|
||||||
|
}
|
||||||
|
|
||||||
static void nft_rbtree_remove(const struct net *net,
|
static void nft_rbtree_remove(const struct net *net,
|
||||||
const struct nft_set *set,
|
const struct nft_set *set,
|
||||||
const struct nft_set_elem *elem)
|
const struct nft_set_elem *elem)
|
||||||
@ -515,11 +523,7 @@ static void nft_rbtree_remove(const struct net *net,
|
|||||||
struct nft_rbtree *priv = nft_set_priv(set);
|
struct nft_rbtree *priv = nft_set_priv(set);
|
||||||
struct nft_rbtree_elem *rbe = elem->priv;
|
struct nft_rbtree_elem *rbe = elem->priv;
|
||||||
|
|
||||||
write_lock_bh(&priv->lock);
|
nft_rbtree_erase(priv, rbe);
|
||||||
write_seqcount_begin(&priv->count);
|
|
||||||
rb_erase(&rbe->node, &priv->root);
|
|
||||||
write_seqcount_end(&priv->count);
|
|
||||||
write_unlock_bh(&priv->lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nft_rbtree_activate(const struct net *net,
|
static void nft_rbtree_activate(const struct net *net,
|
||||||
@ -613,45 +617,40 @@ cont:
|
|||||||
read_unlock_bh(&priv->lock);
|
read_unlock_bh(&priv->lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nft_rbtree_gc(struct work_struct *work)
|
static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
|
||||||
|
struct nft_rbtree *priv,
|
||||||
|
struct nft_rbtree_elem *rbe)
|
||||||
{
|
{
|
||||||
|
struct nft_set_elem elem = {
|
||||||
|
.priv = rbe,
|
||||||
|
};
|
||||||
|
|
||||||
|
nft_setelem_data_deactivate(net, set, &elem);
|
||||||
|
nft_rbtree_erase(priv, rbe);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nft_rbtree_gc(struct nft_set *set)
|
||||||
|
{
|
||||||
|
struct nft_rbtree *priv = nft_set_priv(set);
|
||||||
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
|
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
|
||||||
struct nftables_pernet *nft_net;
|
struct nftables_pernet *nft_net;
|
||||||
struct nft_rbtree *priv;
|
struct rb_node *node, *next;
|
||||||
struct nft_trans_gc *gc;
|
struct nft_trans_gc *gc;
|
||||||
struct rb_node *node;
|
|
||||||
struct nft_set *set;
|
|
||||||
unsigned int gc_seq;
|
|
||||||
struct net *net;
|
struct net *net;
|
||||||
|
|
||||||
priv = container_of(work, struct nft_rbtree, gc_work.work);
|
|
||||||
set = nft_set_container_of(priv);
|
set = nft_set_container_of(priv);
|
||||||
net = read_pnet(&set->net);
|
net = read_pnet(&set->net);
|
||||||
nft_net = nft_pernet(net);
|
nft_net = nft_pernet(net);
|
||||||
gc_seq = READ_ONCE(nft_net->gc_seq);
|
|
||||||
|
|
||||||
if (nft_set_gc_is_pending(set))
|
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
|
||||||
goto done;
|
|
||||||
|
|
||||||
gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
|
|
||||||
if (!gc)
|
if (!gc)
|
||||||
goto done;
|
return;
|
||||||
|
|
||||||
read_lock_bh(&priv->lock);
|
for (node = rb_first(&priv->root); node ; node = next) {
|
||||||
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
|
next = rb_next(node);
|
||||||
|
|
||||||
/* Ruleset has been updated, try later. */
|
|
||||||
if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
|
|
||||||
nft_trans_gc_destroy(gc);
|
|
||||||
gc = NULL;
|
|
||||||
goto try_later;
|
|
||||||
}
|
|
||||||
|
|
||||||
rbe = rb_entry(node, struct nft_rbtree_elem, node);
|
rbe = rb_entry(node, struct nft_rbtree_elem, node);
|
||||||
|
|
||||||
if (nft_set_elem_is_dead(&rbe->ext))
|
|
||||||
goto dead_elem;
|
|
||||||
|
|
||||||
/* elements are reversed in the rbtree for historical reasons,
|
/* elements are reversed in the rbtree for historical reasons,
|
||||||
* from highest to lowest value, that is why end element is
|
* from highest to lowest value, that is why end element is
|
||||||
* always visited before the start element.
|
* always visited before the start element.
|
||||||
@ -663,37 +662,34 @@ static void nft_rbtree_gc(struct work_struct *work)
|
|||||||
if (!nft_set_elem_expired(&rbe->ext))
|
if (!nft_set_elem_expired(&rbe->ext))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
nft_set_elem_dead(&rbe->ext);
|
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
|
||||||
|
|
||||||
if (!rbe_end)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
nft_set_elem_dead(&rbe_end->ext);
|
|
||||||
|
|
||||||
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
|
|
||||||
if (!gc)
|
if (!gc)
|
||||||
goto try_later;
|
goto try_later;
|
||||||
|
|
||||||
nft_trans_gc_elem_add(gc, rbe_end);
|
/* end element needs to be removed first, it has
|
||||||
rbe_end = NULL;
|
* no timeout extension.
|
||||||
dead_elem:
|
*/
|
||||||
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
|
if (rbe_end) {
|
||||||
|
nft_rbtree_gc_remove(net, set, priv, rbe_end);
|
||||||
|
nft_trans_gc_elem_add(gc, rbe_end);
|
||||||
|
rbe_end = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
|
||||||
if (!gc)
|
if (!gc)
|
||||||
goto try_later;
|
goto try_later;
|
||||||
|
|
||||||
|
nft_rbtree_gc_remove(net, set, priv, rbe);
|
||||||
nft_trans_gc_elem_add(gc, rbe);
|
nft_trans_gc_elem_add(gc, rbe);
|
||||||
}
|
}
|
||||||
|
|
||||||
gc = nft_trans_gc_catchall_async(gc, gc_seq);
|
|
||||||
|
|
||||||
try_later:
|
try_later:
|
||||||
read_unlock_bh(&priv->lock);
|
|
||||||
|
|
||||||
if (gc)
|
if (gc) {
|
||||||
nft_trans_gc_queue_async_done(gc);
|
gc = nft_trans_gc_catchall_sync(gc);
|
||||||
done:
|
nft_trans_gc_queue_sync_done(gc);
|
||||||
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
|
priv->last_gc = jiffies;
|
||||||
nft_set_gc_interval(set));
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
|
static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
|
||||||
@ -712,11 +708,6 @@ static int nft_rbtree_init(const struct nft_set *set,
|
|||||||
seqcount_rwlock_init(&priv->count, &priv->lock);
|
seqcount_rwlock_init(&priv->count, &priv->lock);
|
||||||
priv->root = RB_ROOT;
|
priv->root = RB_ROOT;
|
||||||
|
|
||||||
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
|
|
||||||
if (set->flags & NFT_SET_TIMEOUT)
|
|
||||||
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
|
|
||||||
nft_set_gc_interval(set));
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -727,8 +718,6 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx,
|
|||||||
struct nft_rbtree_elem *rbe;
|
struct nft_rbtree_elem *rbe;
|
||||||
struct rb_node *node;
|
struct rb_node *node;
|
||||||
|
|
||||||
cancel_delayed_work_sync(&priv->gc_work);
|
|
||||||
rcu_barrier();
|
|
||||||
while ((node = priv->root.rb_node) != NULL) {
|
while ((node = priv->root.rb_node) != NULL) {
|
||||||
rb_erase(node, &priv->root);
|
rb_erase(node, &priv->root);
|
||||||
rbe = rb_entry(node, struct nft_rbtree_elem, node);
|
rbe = rb_entry(node, struct nft_rbtree_elem, node);
|
||||||
@ -754,6 +743,21 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nft_rbtree_commit(struct nft_set *set)
|
||||||
|
{
|
||||||
|
struct nft_rbtree *priv = nft_set_priv(set);
|
||||||
|
|
||||||
|
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
|
||||||
|
nft_rbtree_gc(set);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void nft_rbtree_gc_init(const struct nft_set *set)
|
||||||
|
{
|
||||||
|
struct nft_rbtree *priv = nft_set_priv(set);
|
||||||
|
|
||||||
|
priv->last_gc = jiffies;
|
||||||
|
}
|
||||||
|
|
||||||
const struct nft_set_type nft_set_rbtree_type = {
|
const struct nft_set_type nft_set_rbtree_type = {
|
||||||
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
|
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
|
||||||
.ops = {
|
.ops = {
|
||||||
@ -767,6 +771,8 @@ const struct nft_set_type nft_set_rbtree_type = {
|
|||||||
.deactivate = nft_rbtree_deactivate,
|
.deactivate = nft_rbtree_deactivate,
|
||||||
.flush = nft_rbtree_flush,
|
.flush = nft_rbtree_flush,
|
||||||
.activate = nft_rbtree_activate,
|
.activate = nft_rbtree_activate,
|
||||||
|
.commit = nft_rbtree_commit,
|
||||||
|
.gc_init = nft_rbtree_gc_init,
|
||||||
.lookup = nft_rbtree_lookup,
|
.lookup = nft_rbtree_lookup,
|
||||||
.walk = nft_rbtree_walk,
|
.walk = nft_rbtree_walk,
|
||||||
.get = nft_rbtree_get,
|
.get = nft_rbtree_get,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user