Skip to content

Commit 7d259f0

Browse files
Florian Westphalummakynes
authored andcommitted
netfilter: nft_set_rbtree: prefer sync gc to async worker
There is no need for asynchronous garbage collection, rbtree inserts can only happen from the netlink control plane. We already perform on-demand gc on insertion, in the area of the tree where the insertion takes place, but we don't do a full tree walk there for performance reasons. Do a full gc walk at the end of the transaction instead and remove the async worker. Signed-off-by: Florian Westphal <[email protected]> Signed-off-by: Pablo Neira Ayuso <[email protected]>
1 parent 8079fc3 commit 7d259f0

File tree

1 file changed

+65
-59
lines changed

1 file changed

+65
-59
lines changed

net/netfilter/nft_set_rbtree.c

Lines changed: 65 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct nft_rbtree {
1919
struct rb_root root;
2020
rwlock_t lock;
2121
seqcount_rwlock_t count;
22-
struct delayed_work gc_work;
22+
unsigned long last_gc;
2323
};
2424

2525
struct nft_rbtree_elem {
@@ -48,8 +48,7 @@ static int nft_rbtree_cmp(const struct nft_set *set,
4848

4949
static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
5050
{
51-
return nft_set_elem_expired(&rbe->ext) ||
52-
nft_set_elem_is_dead(&rbe->ext);
51+
return nft_set_elem_expired(&rbe->ext);
5352
}
5453

5554
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -508,18 +507,23 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
508507
return err;
509508
}
510509

510+
static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
511+
{
512+
write_lock_bh(&priv->lock);
513+
write_seqcount_begin(&priv->count);
514+
rb_erase(&rbe->node, &priv->root);
515+
write_seqcount_end(&priv->count);
516+
write_unlock_bh(&priv->lock);
517+
}
518+
511519
static void nft_rbtree_remove(const struct net *net,
512520
const struct nft_set *set,
513521
const struct nft_set_elem *elem)
514522
{
515523
struct nft_rbtree *priv = nft_set_priv(set);
516524
struct nft_rbtree_elem *rbe = elem->priv;
517525

518-
write_lock_bh(&priv->lock);
519-
write_seqcount_begin(&priv->count);
520-
rb_erase(&rbe->node, &priv->root);
521-
write_seqcount_end(&priv->count);
522-
write_unlock_bh(&priv->lock);
526+
nft_rbtree_erase(priv, rbe);
523527
}
524528

525529
static void nft_rbtree_activate(const struct net *net,
@@ -613,45 +617,40 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
613617
read_unlock_bh(&priv->lock);
614618
}
615619

616-
static void nft_rbtree_gc(struct work_struct *work)
620+
static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
621+
struct nft_rbtree *priv,
622+
struct nft_rbtree_elem *rbe)
617623
{
624+
struct nft_set_elem elem = {
625+
.priv = rbe,
626+
};
627+
628+
nft_setelem_data_deactivate(net, set, &elem);
629+
nft_rbtree_erase(priv, rbe);
630+
}
631+
632+
static void nft_rbtree_gc(struct nft_set *set)
633+
{
634+
struct nft_rbtree *priv = nft_set_priv(set);
618635
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
619636
struct nftables_pernet *nft_net;
620-
struct nft_rbtree *priv;
637+
struct rb_node *node, *next;
621638
struct nft_trans_gc *gc;
622-
struct rb_node *node;
623-
struct nft_set *set;
624-
unsigned int gc_seq;
625639
struct net *net;
626640

627-
priv = container_of(work, struct nft_rbtree, gc_work.work);
628641
set = nft_set_container_of(priv);
629642
net = read_pnet(&set->net);
630643
nft_net = nft_pernet(net);
631-
gc_seq = READ_ONCE(nft_net->gc_seq);
632644

633-
if (nft_set_gc_is_pending(set))
634-
goto done;
635-
636-
gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
645+
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
637646
if (!gc)
638-
goto done;
639-
640-
read_lock_bh(&priv->lock);
641-
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
647+
return;
642648

643-
/* Ruleset has been updated, try later. */
644-
if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
645-
nft_trans_gc_destroy(gc);
646-
gc = NULL;
647-
goto try_later;
648-
}
649+
for (node = rb_first(&priv->root); node ; node = next) {
650+
next = rb_next(node);
649651

650652
rbe = rb_entry(node, struct nft_rbtree_elem, node);
651653

652-
if (nft_set_elem_is_dead(&rbe->ext))
653-
goto dead_elem;
654-
655654
/* elements are reversed in the rbtree for historical reasons,
656655
* from highest to lowest value, that is why end element is
657656
* always visited before the start element.
@@ -663,37 +662,34 @@ static void nft_rbtree_gc(struct work_struct *work)
663662
if (!nft_set_elem_expired(&rbe->ext))
664663
continue;
665664

666-
nft_set_elem_dead(&rbe->ext);
667-
668-
if (!rbe_end)
669-
continue;
670-
671-
nft_set_elem_dead(&rbe_end->ext);
672-
673-
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
665+
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
674666
if (!gc)
675667
goto try_later;
676668

677-
nft_trans_gc_elem_add(gc, rbe_end);
678-
rbe_end = NULL;
679-
dead_elem:
680-
gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
669+
/* end element needs to be removed first, it has
670+
* no timeout extension.
671+
*/
672+
if (rbe_end) {
673+
nft_rbtree_gc_remove(net, set, priv, rbe_end);
674+
nft_trans_gc_elem_add(gc, rbe_end);
675+
rbe_end = NULL;
676+
}
677+
678+
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
681679
if (!gc)
682680
goto try_later;
683681

682+
nft_rbtree_gc_remove(net, set, priv, rbe);
684683
nft_trans_gc_elem_add(gc, rbe);
685684
}
686685

687-
gc = nft_trans_gc_catchall_async(gc, gc_seq);
688-
689686
try_later:
690-
read_unlock_bh(&priv->lock);
691687

692-
if (gc)
693-
nft_trans_gc_queue_async_done(gc);
694-
done:
695-
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
696-
nft_set_gc_interval(set));
688+
if (gc) {
689+
gc = nft_trans_gc_catchall_sync(gc);
690+
nft_trans_gc_queue_sync_done(gc);
691+
priv->last_gc = jiffies;
692+
}
697693
}
698694

699695
static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
@@ -712,11 +708,6 @@ static int nft_rbtree_init(const struct nft_set *set,
712708
seqcount_rwlock_init(&priv->count, &priv->lock);
713709
priv->root = RB_ROOT;
714710

715-
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rbtree_gc);
716-
if (set->flags & NFT_SET_TIMEOUT)
717-
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
718-
nft_set_gc_interval(set));
719-
720711
return 0;
721712
}
722713

@@ -727,8 +718,6 @@ static void nft_rbtree_destroy(const struct nft_ctx *ctx,
727718
struct nft_rbtree_elem *rbe;
728719
struct rb_node *node;
729720

730-
cancel_delayed_work_sync(&priv->gc_work);
731-
rcu_barrier();
732721
while ((node = priv->root.rb_node) != NULL) {
733722
rb_erase(node, &priv->root);
734723
rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -754,6 +743,21 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
754743
return true;
755744
}
756745

746+
static void nft_rbtree_commit(struct nft_set *set)
747+
{
748+
struct nft_rbtree *priv = nft_set_priv(set);
749+
750+
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
751+
nft_rbtree_gc(set);
752+
}
753+
754+
static void nft_rbtree_gc_init(const struct nft_set *set)
755+
{
756+
struct nft_rbtree *priv = nft_set_priv(set);
757+
758+
priv->last_gc = jiffies;
759+
}
760+
757761
const struct nft_set_type nft_set_rbtree_type = {
758762
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
759763
.ops = {
@@ -767,6 +771,8 @@ const struct nft_set_type nft_set_rbtree_type = {
767771
.deactivate = nft_rbtree_deactivate,
768772
.flush = nft_rbtree_flush,
769773
.activate = nft_rbtree_activate,
774+
.commit = nft_rbtree_commit,
775+
.gc_init = nft_rbtree_gc_init,
770776
.lookup = nft_rbtree_lookup,
771777
.walk = nft_rbtree_walk,
772778
.get = nft_rbtree_get,

0 commit comments

Comments
 (0)