1a986518b8
Extend the bpf hashmap docs to include a brief description of the internals of the LRU map type (setting appropriate API expectations), including the original commit message from Martin and a variant on the graph that I had presented during my Linux Plumbers Conference 2022 talk on "Pressure feedback for LRU map types"[0]. The node names in the dot file correspond roughly to the functions where the logic for those decisions or steps is defined, to help curious developers to cross-reference and update this logic if the details of the LRU implementation ever differ from this description. [0] https://lpc.events/event/16/contributions/1368/ Signed-off-by: Joe Stringer <joe@isovalent.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com> Acked-by: John Fastabend <john.fastabend@gmail.com> Link: https://lore.kernel.org/bpf/20230422172054.3355436-2-joe@isovalent.com
173 lines
7.4 KiB
Plaintext
173 lines
7.4 KiB
Plaintext
// SPDX-License-Identifier: GPL-2.0-only
|
|
// Copyright (C) 2022-2023 Isovalent, Inc.
|
|
digraph {
|
|
node [colorscheme=accent4,style=filled] # Apply colorscheme to all nodes
|
|
graph [splines=ortho, nodesep=1]
|
|
|
|
subgraph cluster_key {
|
|
label = "Key\n(locks held during operation)";
|
|
rankdir = TB;
|
|
|
|
remote_lock [shape=rectangle,fillcolor=4,label="remote CPU LRU lock"]
|
|
hash_lock [shape=rectangle,fillcolor=3,label="hashtab lock"]
|
|
lru_lock [shape=rectangle,fillcolor=2,label="LRU lock"]
|
|
local_lock [shape=rectangle,fillcolor=1,label="local CPU LRU lock"]
|
|
no_lock [shape=rectangle,label="no locks held"]
|
|
}
|
|
|
|
begin [shape=oval,label="begin\nbpf_map_update()"]
|
|
|
|
// Nodes below with an 'fn_' prefix are roughly labeled by the C function
|
|
// names that initiate the corresponding logic in kernel/bpf/bpf_lru_list.c.
|
|
// Number suffixes and errno suffixes handle subsections of the corresponding
|
|
// logic in the function as of the writing of this dot.
|
|
|
|
// cf. __local_list_pop_free() / bpf_percpu_lru_pop_free()
|
|
local_freelist_check [shape=diamond,fillcolor=1,
|
|
label="Local freelist\nnode available?"];
|
|
use_local_node [shape=rectangle,
|
|
label="Use node owned\nby this CPU"]
|
|
|
|
// cf. bpf_lru_pop_free()
|
|
common_lru_check [shape=diamond,
|
|
label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
|
|
|
|
fn_bpf_lru_list_pop_free_to_local [shape=rectangle,fillcolor=2,
|
|
label="Flush local pending,
|
|
Rotate Global list, move
|
|
LOCAL_FREE_TARGET
|
|
from global -> local"]
|
|
// Also corresponds to:
|
|
// fn__local_list_flush()
|
|
// fn_bpf_lru_list_rotate()
|
|
fn___bpf_lru_node_move_to_free[shape=diamond,fillcolor=2,
|
|
label="Able to free\nLOCAL_FREE_TARGET\nnodes?"]
|
|
|
|
fn___bpf_lru_list_shrink_inactive [shape=rectangle,fillcolor=3,
|
|
label="Shrink inactive list
|
|
up to remaining
|
|
LOCAL_FREE_TARGET
|
|
(global LRU -> local)"]
|
|
fn___bpf_lru_list_shrink [shape=diamond,fillcolor=2,
|
|
label="> 0 entries in\nlocal free list?"]
|
|
fn___bpf_lru_list_shrink2 [shape=rectangle,fillcolor=2,
|
|
label="Steal one node from
|
|
inactive, or if empty,
|
|
from active global list"]
|
|
fn___bpf_lru_list_shrink3 [shape=rectangle,fillcolor=3,
|
|
label="Try to remove\nnode from hashtab"]
|
|
|
|
local_freelist_check2 [shape=diamond,label="Htab removal\nsuccessful?"]
|
|
common_lru_check2 [shape=diamond,
|
|
label="Map created with\ncommon LRU?\n(!BPF_F_NO_COMMON_LRU)"];
|
|
|
|
subgraph cluster_remote_lock {
|
|
label = "Iterate through CPUs\n(start from current)";
|
|
style = dashed;
|
|
rankdir=LR;
|
|
|
|
local_freelist_check5 [shape=diamond,fillcolor=4,
|
|
label="Steal a node from\nper-cpu freelist?"]
|
|
local_freelist_check6 [shape=rectangle,fillcolor=4,
|
|
label="Steal a node from
|
|
(1) Unreferenced pending, or
|
|
(2) Any pending node"]
|
|
local_freelist_check7 [shape=rectangle,fillcolor=3,
|
|
label="Try to remove\nnode from hashtab"]
|
|
fn_htab_lru_map_update_elem [shape=diamond,
|
|
label="Stole node\nfrom remote\nCPU?"]
|
|
fn_htab_lru_map_update_elem2 [shape=diamond,label="Iterated\nall CPUs?"]
|
|
// Also corresponds to:
|
|
// use_local_node()
|
|
// fn__local_list_pop_pending()
|
|
}
|
|
|
|
fn_bpf_lru_list_pop_free_to_local2 [shape=rectangle,
|
|
label="Use node that was\nnot recently referenced"]
|
|
local_freelist_check4 [shape=rectangle,
|
|
label="Use node that was\nactively referenced\nin global list"]
|
|
fn_htab_lru_map_update_elem_ENOMEM [shape=oval,label="return -ENOMEM"]
|
|
fn_htab_lru_map_update_elem3 [shape=rectangle,
|
|
label="Use node that was\nactively referenced\nin (another?) CPU's cache"]
|
|
fn_htab_lru_map_update_elem4 [shape=rectangle,fillcolor=3,
|
|
label="Update hashmap\nwith new element"]
|
|
fn_htab_lru_map_update_elem5 [shape=oval,label="return 0"]
|
|
fn_htab_lru_map_update_elem_EBUSY [shape=oval,label="return -EBUSY"]
|
|
fn_htab_lru_map_update_elem_EEXIST [shape=oval,label="return -EEXIST"]
|
|
fn_htab_lru_map_update_elem_ENOENT [shape=oval,label="return -ENOENT"]
|
|
|
|
begin -> local_freelist_check
|
|
local_freelist_check -> use_local_node [xlabel="Y"]
|
|
local_freelist_check -> common_lru_check [xlabel="N"]
|
|
common_lru_check -> fn_bpf_lru_list_pop_free_to_local [xlabel="Y"]
|
|
common_lru_check -> fn___bpf_lru_list_shrink_inactive [xlabel="N"]
|
|
fn_bpf_lru_list_pop_free_to_local -> fn___bpf_lru_node_move_to_free
|
|
fn___bpf_lru_node_move_to_free ->
|
|
fn_bpf_lru_list_pop_free_to_local2 [xlabel="Y"]
|
|
fn___bpf_lru_node_move_to_free ->
|
|
fn___bpf_lru_list_shrink_inactive [xlabel="N"]
|
|
fn___bpf_lru_list_shrink_inactive -> fn___bpf_lru_list_shrink
|
|
fn___bpf_lru_list_shrink -> fn_bpf_lru_list_pop_free_to_local2 [xlabel = "Y"]
|
|
fn___bpf_lru_list_shrink -> fn___bpf_lru_list_shrink2 [xlabel="N"]
|
|
fn___bpf_lru_list_shrink2 -> fn___bpf_lru_list_shrink3
|
|
fn___bpf_lru_list_shrink3 -> local_freelist_check2
|
|
local_freelist_check2 -> local_freelist_check4 [xlabel = "Y"]
|
|
local_freelist_check2 -> common_lru_check2 [xlabel = "N"]
|
|
common_lru_check2 -> local_freelist_check5 [xlabel = "Y"]
|
|
common_lru_check2 -> fn_htab_lru_map_update_elem_ENOMEM [xlabel = "N"]
|
|
local_freelist_check5 -> fn_htab_lru_map_update_elem [xlabel = "Y"]
|
|
local_freelist_check5 -> local_freelist_check6 [xlabel = "N"]
|
|
local_freelist_check6 -> local_freelist_check7
|
|
local_freelist_check7 -> fn_htab_lru_map_update_elem
|
|
|
|
fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem3 [xlabel = "Y"]
|
|
fn_htab_lru_map_update_elem -> fn_htab_lru_map_update_elem2 [xlabel = "N"]
|
|
fn_htab_lru_map_update_elem2 ->
|
|
fn_htab_lru_map_update_elem_ENOMEM [xlabel = "Y"]
|
|
fn_htab_lru_map_update_elem2 -> local_freelist_check5 [xlabel = "N"]
|
|
fn_htab_lru_map_update_elem3 -> fn_htab_lru_map_update_elem4
|
|
|
|
use_local_node -> fn_htab_lru_map_update_elem4
|
|
fn_bpf_lru_list_pop_free_to_local2 -> fn_htab_lru_map_update_elem4
|
|
local_freelist_check4 -> fn_htab_lru_map_update_elem4
|
|
|
|
fn_htab_lru_map_update_elem4 -> fn_htab_lru_map_update_elem5 [headlabel="Success"]
|
|
fn_htab_lru_map_update_elem4 ->
|
|
fn_htab_lru_map_update_elem_EBUSY [xlabel="Hashtab lock failed"]
|
|
fn_htab_lru_map_update_elem4 ->
|
|
fn_htab_lru_map_update_elem_EEXIST [xlabel="BPF_EXIST set and\nkey already exists"]
|
|
fn_htab_lru_map_update_elem4 ->
|
|
fn_htab_lru_map_update_elem_ENOENT [headlabel="BPF_NOEXIST set\nand no such entry"]
|
|
|
|
// Create invisible pad nodes to line up various nodes
|
|
pad0 [style=invis]
|
|
pad1 [style=invis]
|
|
pad2 [style=invis]
|
|
pad3 [style=invis]
|
|
pad4 [style=invis]
|
|
|
|
// Line up the key with the top of the graph
|
|
no_lock -> local_lock [style=invis]
|
|
local_lock -> lru_lock [style=invis]
|
|
lru_lock -> hash_lock [style=invis]
|
|
hash_lock -> remote_lock [style=invis]
|
|
remote_lock -> local_freelist_check5 [style=invis]
|
|
remote_lock -> fn___bpf_lru_list_shrink [style=invis]
|
|
|
|
// Line up return code nodes at the bottom of the graph
|
|
fn_htab_lru_map_update_elem -> pad0 [style=invis]
|
|
pad0 -> pad1 [style=invis]
|
|
pad1 -> pad2 [style=invis]
|
|
//pad2-> fn_htab_lru_map_update_elem_ENOMEM [style=invis]
|
|
fn_htab_lru_map_update_elem4 -> pad3 [style=invis]
|
|
pad3 -> fn_htab_lru_map_update_elem5 [style=invis]
|
|
pad3 -> fn_htab_lru_map_update_elem_EBUSY [style=invis]
|
|
pad3 -> fn_htab_lru_map_update_elem_EEXIST [style=invis]
|
|
pad3 -> fn_htab_lru_map_update_elem_ENOENT [style=invis]
|
|
|
|
// Reduce diagram width by forcing some nodes to appear above others
|
|
local_freelist_check4 -> fn_htab_lru_map_update_elem3 [style=invis]
|
|
common_lru_check2 -> pad4 [style=invis]
|
|
pad4 -> local_freelist_check5 [style=invis]
|
|
}
|