sh: Optimise FDE/CIE lookup by using red-black trees
Now that the DWARF unwinder is being used to provide perf callstacks unwinding speed is an issue. It is no longer being used in exceptional circumstances where we don't care about runtime performance, e.g. when panicing, so it makes sense improve performance is possible. With this patch I saw a 42% improvement in unwind time when calling return_address(1). Greater improvements will be seen as the number of levels unwound increases as each unwind is now cheaper. Note that insertion time has doubled but that's just the price we pay for keeping the trees balanced. However, this is a one-time cost for kernel boot/module load and so the improvements in lookup time dominate the extra time we spend keeping the trees balanced. Signed-off-by: Matt Fleming <matt@console-pimps.org> Signed-off-by: Paul Mundt <lethal@linux-sh.org>
This commit is contained in:
parent
1af0b2fc67
commit
858918b77b
@ -243,16 +243,13 @@ struct dwarf_cie {
|
||||
|
||||
unsigned long cie_pointer;
|
||||
|
||||
struct list_head link;
|
||||
|
||||
unsigned long flags;
|
||||
#define DWARF_CIE_Z_AUGMENTATION (1 << 0)
|
||||
|
||||
/*
|
||||
* 'mod' will be non-NULL if this CIE came from a module's
|
||||
* .eh_frame section.
|
||||
*/
|
||||
struct module *mod;
|
||||
/* linked-list entry if this CIE is from a module */
|
||||
struct list_head link;
|
||||
|
||||
struct rb_node node;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -266,13 +263,11 @@ struct dwarf_fde {
|
||||
unsigned long address_range;
|
||||
unsigned char *instructions;
|
||||
unsigned char *end;
|
||||
|
||||
/* linked-list entry if this FDE is from a module */
|
||||
struct list_head link;
|
||||
|
||||
/*
|
||||
* 'mod' will be non-NULL if this FDE came from a module's
|
||||
* .eh_frame section.
|
||||
*/
|
||||
struct module *mod;
|
||||
struct rb_node node;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -1,7 +1,22 @@
|
||||
#ifndef _ASM_SH_MODULE_H
|
||||
#define _ASM_SH_MODULE_H
|
||||
|
||||
#include <asm-generic/module.h>
|
||||
struct mod_arch_specific {
|
||||
#ifdef CONFIG_DWARF_UNWINDER
|
||||
struct list_head fde_list;
|
||||
struct list_head cie_list;
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
#define Elf_Shdr Elf64_Shdr
|
||||
#define Elf_Sym Elf64_Sym
|
||||
#define Elf_Ehdr Elf64_Ehdr
|
||||
#else
|
||||
#define Elf_Shdr Elf32_Shdr
|
||||
#define Elf_Sym Elf32_Sym
|
||||
#define Elf_Ehdr Elf32_Ehdr
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_LITTLE_ENDIAN
|
||||
# ifdef CONFIG_CPU_SH2
|
||||
|
@ -39,10 +39,10 @@ static mempool_t *dwarf_frame_pool;
|
||||
static struct kmem_cache *dwarf_reg_cachep;
|
||||
static mempool_t *dwarf_reg_pool;
|
||||
|
||||
static LIST_HEAD(dwarf_cie_list);
|
||||
static struct rb_root cie_root;
|
||||
static DEFINE_SPINLOCK(dwarf_cie_lock);
|
||||
|
||||
static LIST_HEAD(dwarf_fde_list);
|
||||
static struct rb_root fde_root;
|
||||
static DEFINE_SPINLOCK(dwarf_fde_lock);
|
||||
|
||||
static struct dwarf_cie *cached_cie;
|
||||
@ -301,7 +301,8 @@ static inline int dwarf_entry_len(char *addr, unsigned long *len)
|
||||
*/
|
||||
static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
|
||||
{
|
||||
struct dwarf_cie *cie;
|
||||
struct rb_node **rb_node = &cie_root.rb_node;
|
||||
struct dwarf_cie *cie = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dwarf_cie_lock, flags);
|
||||
@ -315,16 +316,24 @@ static struct dwarf_cie *dwarf_lookup_cie(unsigned long cie_ptr)
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_for_each_entry(cie, &dwarf_cie_list, link) {
|
||||
if (cie->cie_pointer == cie_ptr) {
|
||||
cached_cie = cie;
|
||||
break;
|
||||
while (*rb_node) {
|
||||
struct dwarf_cie *cie_tmp;
|
||||
|
||||
cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
|
||||
BUG_ON(!cie_tmp);
|
||||
|
||||
if (cie_ptr == cie_tmp->cie_pointer) {
|
||||
cie = cie_tmp;
|
||||
cached_cie = cie_tmp;
|
||||
goto out;
|
||||
} else {
|
||||
if (cie_ptr < cie_tmp->cie_pointer)
|
||||
rb_node = &(*rb_node)->rb_left;
|
||||
else
|
||||
rb_node = &(*rb_node)->rb_right;
|
||||
}
|
||||
}
|
||||
|
||||
/* Couldn't find the entry in the list. */
|
||||
if (&cie->link == &dwarf_cie_list)
|
||||
cie = NULL;
|
||||
out:
|
||||
spin_unlock_irqrestore(&dwarf_cie_lock, flags);
|
||||
return cie;
|
||||
@ -336,25 +345,34 @@ out:
|
||||
*/
|
||||
struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
|
||||
{
|
||||
struct dwarf_fde *fde;
|
||||
struct rb_node **rb_node = &fde_root.rb_node;
|
||||
struct dwarf_fde *fde = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dwarf_fde_lock, flags);
|
||||
|
||||
list_for_each_entry(fde, &dwarf_fde_list, link) {
|
||||
unsigned long start, end;
|
||||
while (*rb_node) {
|
||||
struct dwarf_fde *fde_tmp;
|
||||
unsigned long tmp_start, tmp_end;
|
||||
|
||||
start = fde->initial_location;
|
||||
end = fde->initial_location + fde->address_range;
|
||||
fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
|
||||
BUG_ON(!fde_tmp);
|
||||
|
||||
if (pc >= start && pc < end)
|
||||
break;
|
||||
tmp_start = fde_tmp->initial_location;
|
||||
tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
|
||||
|
||||
if (pc < tmp_start) {
|
||||
rb_node = &(*rb_node)->rb_left;
|
||||
} else {
|
||||
if (pc < tmp_end) {
|
||||
fde = fde_tmp;
|
||||
goto out;
|
||||
} else
|
||||
rb_node = &(*rb_node)->rb_right;
|
||||
}
|
||||
}
|
||||
|
||||
/* Couldn't find the entry in the list. */
|
||||
if (&fde->link == &dwarf_fde_list)
|
||||
fde = NULL;
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&dwarf_fde_lock, flags);
|
||||
|
||||
return fde;
|
||||
@ -552,8 +570,8 @@ extern void ret_from_irq(void);
|
||||
* on the callstack. Each of the lower (older) stack frames are
|
||||
* linked via the "prev" member.
|
||||
*/
|
||||
struct dwarf_frame * dwarf_unwind_stack(unsigned long pc,
|
||||
struct dwarf_frame *prev)
|
||||
struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
|
||||
struct dwarf_frame *prev)
|
||||
{
|
||||
struct dwarf_frame *frame;
|
||||
struct dwarf_cie *cie;
|
||||
@ -708,6 +726,8 @@ bail:
|
||||
static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
|
||||
unsigned char *end, struct module *mod)
|
||||
{
|
||||
struct rb_node **rb_node = &cie_root.rb_node;
|
||||
struct rb_node *parent;
|
||||
struct dwarf_cie *cie;
|
||||
unsigned long flags;
|
||||
int count;
|
||||
@ -802,11 +822,30 @@ static int dwarf_parse_cie(void *entry, void *p, unsigned long len,
|
||||
cie->initial_instructions = p;
|
||||
cie->instructions_end = end;
|
||||
|
||||
cie->mod = mod;
|
||||
|
||||
/* Add to list */
|
||||
spin_lock_irqsave(&dwarf_cie_lock, flags);
|
||||
list_add_tail(&cie->link, &dwarf_cie_list);
|
||||
|
||||
while (*rb_node) {
|
||||
struct dwarf_cie *cie_tmp;
|
||||
|
||||
cie_tmp = rb_entry(*rb_node, struct dwarf_cie, node);
|
||||
|
||||
parent = *rb_node;
|
||||
|
||||
if (cie->cie_pointer < cie_tmp->cie_pointer)
|
||||
rb_node = &parent->rb_left;
|
||||
else if (cie->cie_pointer >= cie_tmp->cie_pointer)
|
||||
rb_node = &parent->rb_right;
|
||||
else
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
rb_link_node(&cie->node, parent, rb_node);
|
||||
rb_insert_color(&cie->node, &cie_root);
|
||||
|
||||
if (mod != NULL)
|
||||
list_add_tail(&cie->link, &mod->arch.cie_list);
|
||||
|
||||
spin_unlock_irqrestore(&dwarf_cie_lock, flags);
|
||||
|
||||
return 0;
|
||||
@ -816,6 +855,8 @@ static int dwarf_parse_fde(void *entry, u32 entry_type,
|
||||
void *start, unsigned long len,
|
||||
unsigned char *end, struct module *mod)
|
||||
{
|
||||
struct rb_node **rb_node = &fde_root.rb_node;
|
||||
struct rb_node *parent;
|
||||
struct dwarf_fde *fde;
|
||||
struct dwarf_cie *cie;
|
||||
unsigned long flags;
|
||||
@ -863,11 +904,38 @@ static int dwarf_parse_fde(void *entry, u32 entry_type,
|
||||
fde->instructions = p;
|
||||
fde->end = end;
|
||||
|
||||
fde->mod = mod;
|
||||
|
||||
/* Add to list. */
|
||||
spin_lock_irqsave(&dwarf_fde_lock, flags);
|
||||
list_add_tail(&fde->link, &dwarf_fde_list);
|
||||
|
||||
while (*rb_node) {
|
||||
struct dwarf_fde *fde_tmp;
|
||||
unsigned long tmp_start, tmp_end;
|
||||
unsigned long start, end;
|
||||
|
||||
fde_tmp = rb_entry(*rb_node, struct dwarf_fde, node);
|
||||
|
||||
start = fde->initial_location;
|
||||
end = fde->initial_location + fde->address_range;
|
||||
|
||||
tmp_start = fde_tmp->initial_location;
|
||||
tmp_end = fde_tmp->initial_location + fde_tmp->address_range;
|
||||
|
||||
parent = *rb_node;
|
||||
|
||||
if (start < tmp_start)
|
||||
rb_node = &parent->rb_left;
|
||||
else if (start >= tmp_end)
|
||||
rb_node = &parent->rb_right;
|
||||
else
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
rb_link_node(&fde->node, parent, rb_node);
|
||||
rb_insert_color(&fde->node, &fde_root);
|
||||
|
||||
if (mod != NULL)
|
||||
list_add_tail(&fde->link, &mod->arch.fde_list);
|
||||
|
||||
spin_unlock_irqrestore(&dwarf_fde_lock, flags);
|
||||
|
||||
return 0;
|
||||
@ -912,19 +980,29 @@ static struct unwinder dwarf_unwinder = {
|
||||
|
||||
static void dwarf_unwinder_cleanup(void)
|
||||
{
|
||||
struct dwarf_cie *cie, *cie_tmp;
|
||||
struct dwarf_fde *fde, *fde_tmp;
|
||||
struct rb_node **fde_rb_node = &fde_root.rb_node;
|
||||
struct rb_node **cie_rb_node = &cie_root.rb_node;
|
||||
|
||||
/*
|
||||
* Deallocate all the memory allocated for the DWARF unwinder.
|
||||
* Traverse all the FDE/CIE lists and remove and free all the
|
||||
* memory associated with those data structures.
|
||||
*/
|
||||
list_for_each_entry_safe(cie, cie_tmp, &dwarf_cie_list, link)
|
||||
kfree(cie);
|
||||
while (*fde_rb_node) {
|
||||
struct dwarf_fde *fde;
|
||||
|
||||
list_for_each_entry_safe(fde, fde_tmp, &dwarf_fde_list, link)
|
||||
fde = rb_entry(*fde_rb_node, struct dwarf_fde, node);
|
||||
rb_erase(*fde_rb_node, &fde_root);
|
||||
kfree(fde);
|
||||
}
|
||||
|
||||
while (*cie_rb_node) {
|
||||
struct dwarf_cie *cie;
|
||||
|
||||
cie = rb_entry(*cie_rb_node, struct dwarf_cie, node);
|
||||
rb_erase(*cie_rb_node, &cie_root);
|
||||
kfree(cie);
|
||||
}
|
||||
|
||||
kmem_cache_destroy(dwarf_reg_cachep);
|
||||
kmem_cache_destroy(dwarf_frame_cachep);
|
||||
@ -1024,6 +1102,8 @@ int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
|
||||
|
||||
/* Did we find the .eh_frame section? */
|
||||
if (i != hdr->e_shnum) {
|
||||
INIT_LIST_HEAD(&me->arch.cie_list);
|
||||
INIT_LIST_HEAD(&me->arch.fde_list);
|
||||
err = dwarf_parse_section((char *)start, (char *)end, me);
|
||||
if (err) {
|
||||
printk(KERN_WARNING "%s: failed to parse DWARF info\n",
|
||||
@ -1044,38 +1124,26 @@ int module_dwarf_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
|
||||
*/
|
||||
void module_dwarf_cleanup(struct module *mod)
|
||||
{
|
||||
struct dwarf_fde *fde;
|
||||
struct dwarf_cie *cie;
|
||||
struct dwarf_fde *fde, *ftmp;
|
||||
struct dwarf_cie *cie, *ctmp;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dwarf_cie_lock, flags);
|
||||
|
||||
again_cie:
|
||||
list_for_each_entry(cie, &dwarf_cie_list, link) {
|
||||
if (cie->mod == mod)
|
||||
break;
|
||||
}
|
||||
|
||||
if (&cie->link != &dwarf_cie_list) {
|
||||
list_for_each_entry_safe(cie, ctmp, &mod->arch.cie_list, link) {
|
||||
list_del(&cie->link);
|
||||
rb_erase(&cie->node, &cie_root);
|
||||
kfree(cie);
|
||||
goto again_cie;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&dwarf_cie_lock, flags);
|
||||
|
||||
spin_lock_irqsave(&dwarf_fde_lock, flags);
|
||||
|
||||
again_fde:
|
||||
list_for_each_entry(fde, &dwarf_fde_list, link) {
|
||||
if (fde->mod == mod)
|
||||
break;
|
||||
}
|
||||
|
||||
if (&fde->link != &dwarf_fde_list) {
|
||||
list_for_each_entry_safe(fde, ftmp, &mod->arch.fde_list, link) {
|
||||
list_del(&fde->link);
|
||||
rb_erase(&fde->node, &fde_root);
|
||||
kfree(fde);
|
||||
goto again_fde;
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&dwarf_fde_lock, flags);
|
||||
@ -1094,8 +1162,6 @@ again_fde:
|
||||
static int __init dwarf_unwinder_init(void)
|
||||
{
|
||||
int err;
|
||||
INIT_LIST_HEAD(&dwarf_cie_list);
|
||||
INIT_LIST_HEAD(&dwarf_fde_list);
|
||||
|
||||
dwarf_frame_cachep = kmem_cache_create("dwarf_frames",
|
||||
sizeof(struct dwarf_frame), 0,
|
||||
|
Loading…
Reference in New Issue
Block a user