uprobes: reduce contention on uprobes_tree access
Active uprobes are stored in an RB tree and accesses to this tree are dominated by read operations. Currently these accesses are serialized by a spinlock but this leads to enormous contention when large numbers of threads are executing active probes. This patch converts the spinlock used to serialize access to the uprobes_tree RB tree into a reader-writer spinlock. This lock type aligns naturally with the overwhelmingly read-only nature of the tree usage here. Although the addition of reader-writer spinlocks are discouraged [0], this fix is proposed as an interim solution while an RCU based approach is implemented (that work is in a nascent form). This fix also has the benefit of being trivial, self contained and therefore simple to backport. We have used a uprobe benchmark from the BPF selftests [1] to estimate the improvements. Each block of results below show 1 line per execution of the benchmark ("the "Summary" line) and each line is a run with one more thread added - a thread is a "producer". The lines are edited to remove extraneous output. The tests were executed with this driver script: for num_threads in {1..20} do sudo ./bench -a -p $num_threads trig-uprobe-nop | grep Summary done SPINLOCK (BEFORE) ================== Summary: hits 1.396 ± 0.007M/s ( 1.396M/prod) Summary: hits 1.656 ± 0.016M/s ( 0.828M/prod) Summary: hits 2.246 ± 0.008M/s ( 0.749M/prod) Summary: hits 2.114 ± 0.010M/s ( 0.529M/prod) Summary: hits 2.013 ± 0.009M/s ( 0.403M/prod) Summary: hits 1.753 ± 0.008M/s ( 0.292M/prod) Summary: hits 1.847 ± 0.001M/s ( 0.264M/prod) Summary: hits 1.889 ± 0.001M/s ( 0.236M/prod) Summary: hits 1.833 ± 0.006M/s ( 0.204M/prod) Summary: hits 1.900 ± 0.003M/s ( 0.190M/prod) Summary: hits 1.918 ± 0.006M/s ( 0.174M/prod) Summary: hits 1.925 ± 0.002M/s ( 0.160M/prod) Summary: hits 1.837 ± 0.001M/s ( 0.141M/prod) Summary: hits 1.898 ± 0.001M/s ( 0.136M/prod) Summary: hits 1.799 ± 0.016M/s ( 0.120M/prod) Summary: hits 1.850 ± 0.005M/s ( 0.109M/prod) Summary: hits 1.816 ± 0.002M/s ( 0.101M/prod) Summary: hits 1.787 ± 0.001M/s ( 0.094M/prod) Summary: hits 1.764 ± 0.002M/s ( 0.088M/prod) RW SPINLOCK (AFTER) =================== Summary: hits 1.444 ± 0.020M/s ( 1.444M/prod) Summary: hits 2.279 ± 0.011M/s ( 1.139M/prod) Summary: hits 3.422 ± 0.014M/s ( 1.141M/prod) Summary: hits 3.565 ± 0.017M/s ( 0.891M/prod) Summary: hits 2.671 ± 0.013M/s ( 0.534M/prod) Summary: hits 2.409 ± 0.005M/s ( 0.401M/prod) Summary: hits 2.485 ± 0.008M/s ( 0.355M/prod) Summary: hits 2.496 ± 0.003M/s ( 0.312M/prod) Summary: hits 2.585 ± 0.002M/s ( 0.287M/prod) Summary: hits 2.908 ± 0.011M/s ( 0.291M/prod) Summary: hits 2.346 ± 0.016M/s ( 0.213M/prod) Summary: hits 2.804 ± 0.004M/s ( 0.234M/prod) Summary: hits 2.556 ± 0.001M/s ( 0.197M/prod) Summary: hits 2.754 ± 0.004M/s ( 0.197M/prod) Summary: hits 2.482 ± 0.002M/s ( 0.165M/prod) Summary: hits 2.412 ± 0.005M/s ( 0.151M/prod) Summary: hits 2.710 ± 0.003M/s ( 0.159M/prod) Summary: hits 2.826 ± 0.005M/s ( 0.157M/prod) Summary: hits 2.718 ± 0.001M/s ( 0.143M/prod) Summary: hits 2.844 ± 0.006M/s ( 0.142M/prod) The numbers in parenthesis give averaged throughput per thread which is of greatest interest here as a measure of scalability. Improvements are in the order of 22 - 68% with this particular benchmark (mean = 43%). V2: - Updated commit message to include benchmark results. [0] https://docs.kernel.org/locking/spinlocks.html [1] https://github.com/torvalds/linux/blob/master/tools/testing/selftests/bpf/benchs/bench_trigger.c Link: https://lore.kernel.org/all/20240422102306.6026-1-jonathan.haslam@gmail.com/ Signed-off-by: Jonathan Haslam <jonathan.haslam@gmail.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
This commit is contained in:
parent
5120d167e2
commit
0dc715295d
@ -39,7 +39,7 @@ static struct rb_root uprobes_tree = RB_ROOT;
|
||||
*/
|
||||
#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree)
|
||||
|
||||
static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
|
||||
static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
|
||||
|
||||
#define UPROBES_HASH_SZ 13
|
||||
/* serialize uprobe->pending_list */
|
||||
@ -669,9 +669,9 @@ static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
|
||||
{
|
||||
struct uprobe *uprobe;
|
||||
|
||||
spin_lock(&uprobes_treelock);
|
||||
read_lock(&uprobes_treelock);
|
||||
uprobe = __find_uprobe(inode, offset);
|
||||
spin_unlock(&uprobes_treelock);
|
||||
read_unlock(&uprobes_treelock);
|
||||
|
||||
return uprobe;
|
||||
}
|
||||
@ -701,9 +701,9 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
|
||||
{
|
||||
struct uprobe *u;
|
||||
|
||||
spin_lock(&uprobes_treelock);
|
||||
write_lock(&uprobes_treelock);
|
||||
u = __insert_uprobe(uprobe);
|
||||
spin_unlock(&uprobes_treelock);
|
||||
write_unlock(&uprobes_treelock);
|
||||
|
||||
return u;
|
||||
}
|
||||
@ -935,9 +935,9 @@ static void delete_uprobe(struct uprobe *uprobe)
|
||||
if (WARN_ON(!uprobe_is_active(uprobe)))
|
||||
return;
|
||||
|
||||
spin_lock(&uprobes_treelock);
|
||||
write_lock(&uprobes_treelock);
|
||||
rb_erase(&uprobe->rb_node, &uprobes_tree);
|
||||
spin_unlock(&uprobes_treelock);
|
||||
write_unlock(&uprobes_treelock);
|
||||
RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
|
||||
put_uprobe(uprobe);
|
||||
}
|
||||
@ -1298,7 +1298,7 @@ static void build_probe_list(struct inode *inode,
|
||||
min = vaddr_to_offset(vma, start);
|
||||
max = min + (end - start) - 1;
|
||||
|
||||
spin_lock(&uprobes_treelock);
|
||||
read_lock(&uprobes_treelock);
|
||||
n = find_node_in_range(inode, min, max);
|
||||
if (n) {
|
||||
for (t = n; t; t = rb_prev(t)) {
|
||||
@ -1316,7 +1316,7 @@ static void build_probe_list(struct inode *inode,
|
||||
get_uprobe(u);
|
||||
}
|
||||
}
|
||||
spin_unlock(&uprobes_treelock);
|
||||
read_unlock(&uprobes_treelock);
|
||||
}
|
||||
|
||||
/* @vma contains reference counter, not the probed instruction. */
|
||||
@ -1407,9 +1407,9 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
|
||||
min = vaddr_to_offset(vma, start);
|
||||
max = min + (end - start) - 1;
|
||||
|
||||
spin_lock(&uprobes_treelock);
|
||||
read_lock(&uprobes_treelock);
|
||||
n = find_node_in_range(inode, min, max);
|
||||
spin_unlock(&uprobes_treelock);
|
||||
read_unlock(&uprobes_treelock);
|
||||
|
||||
return !!n;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user