2021-07-01 14:12:27 -07:00
// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2021 Facebook
// Copyright (c) 2021 Google
# include "vmlinux.h"
# include <bpf/bpf_helpers.h>
# include <bpf/bpf_tracing.h>
# include <bpf/bpf_core_read.h>
# define MAX_LEVELS 10 // max cgroup hierarchy level: arbitrary
# define MAX_EVENTS 32 // max events per cgroup: arbitrary
// NOTE: many of map and global data will be modified before loading
// from the userspace (perf tool) using the skeleton helpers.
// single set of global perf events to measure
struct {
__uint ( type , BPF_MAP_TYPE_PERF_EVENT_ARRAY ) ;
__uint ( key_size , sizeof ( __u32 ) ) ;
__uint ( value_size , sizeof ( int ) ) ;
__uint ( max_entries , 1 ) ;
} events SEC ( " .maps " ) ;
// from cgroup id to event index
struct {
__uint ( type , BPF_MAP_TYPE_HASH ) ;
__uint ( key_size , sizeof ( __u64 ) ) ;
__uint ( value_size , sizeof ( __u32 ) ) ;
__uint ( max_entries , 1 ) ;
} cgrp_idx SEC ( " .maps " ) ;
// per-cpu event snapshots to calculate delta
struct {
__uint ( type , BPF_MAP_TYPE_PERCPU_ARRAY ) ;
__uint ( key_size , sizeof ( __u32 ) ) ;
__uint ( value_size , sizeof ( struct bpf_perf_event_value ) ) ;
} prev_readings SEC ( " .maps " ) ;
// aggregated event values for each cgroup (per-cpu)
// will be read from the user-space
struct {
__uint ( type , BPF_MAP_TYPE_PERCPU_ARRAY ) ;
__uint ( key_size , sizeof ( __u32 ) ) ;
__uint ( value_size , sizeof ( struct bpf_perf_event_value ) ) ;
} cgrp_readings SEC ( " .maps " ) ;
2022-10-10 22:28:08 -07:00
/* new kernel cgroup definition */
struct cgroup___new {
int level ;
struct cgroup * ancestors [ ] ;
} __attribute__ ( ( preserve_access_index ) ) ;
/* old kernel cgroup definition */
struct cgroup___old {
int level ;
u64 ancestor_ids [ ] ;
} __attribute__ ( ( preserve_access_index ) ) ;
2021-07-01 14:12:27 -07:00
const volatile __u32 num_events = 1 ;
const volatile __u32 num_cpus = 1 ;
int enabled = 0 ;
int use_cgroup_v2 = 0 ;
2022-09-22 23:32:05 -07:00
int perf_subsys_id = - 1 ;
2021-07-01 14:12:27 -07:00
2022-10-10 22:28:08 -07:00
static inline __u64 get_cgroup_v1_ancestor_id ( struct cgroup * cgrp , int level )
{
/* recast pointer to capture new type for compiler */
struct cgroup___new * cgrp_new = ( void * ) cgrp ;
if ( bpf_core_field_exists ( cgrp_new - > ancestors ) ) {
return BPF_CORE_READ ( cgrp_new , ancestors [ level ] , kn , id ) ;
} else {
/* recast pointer to capture old type for compiler */
struct cgroup___old * cgrp_old = ( void * ) cgrp ;
return BPF_CORE_READ ( cgrp_old , ancestor_ids [ level ] ) ;
}
}
2021-07-01 14:12:27 -07:00
static inline int get_cgroup_v1_idx ( __u32 * cgrps , int size )
{
struct task_struct * p = ( void * ) bpf_get_current_task ( ) ;
struct cgroup * cgrp ;
register int i = 0 ;
__u32 * elem ;
int level ;
int cnt ;
2022-09-22 23:32:05 -07:00
if ( perf_subsys_id = = - 1 ) {
# if __has_builtin(__builtin_preserve_enum_value)
perf_subsys_id = bpf_core_enum_value ( enum cgroup_subsys_id ,
perf_event_cgrp_id ) ;
# else
perf_subsys_id = perf_event_cgrp_id ;
# endif
}
cgrp = BPF_CORE_READ ( p , cgroups , subsys [ perf_subsys_id ] , cgroup ) ;
2021-07-01 14:12:27 -07:00
level = BPF_CORE_READ ( cgrp , level ) ;
for ( cnt = 0 ; i < MAX_LEVELS ; i + + ) {
__u64 cgrp_id ;
if ( i > level )
break ;
// convert cgroup-id to a map index
2022-10-10 22:28:08 -07:00
cgrp_id = get_cgroup_v1_ancestor_id ( cgrp , i ) ;
2021-07-01 14:12:27 -07:00
elem = bpf_map_lookup_elem ( & cgrp_idx , & cgrp_id ) ;
if ( ! elem )
continue ;
cgrps [ cnt + + ] = * elem ;
if ( cnt = = size )
break ;
}
return cnt ;
}
static inline int get_cgroup_v2_idx ( __u32 * cgrps , int size )
{
register int i = 0 ;
__u32 * elem ;
int cnt ;
for ( cnt = 0 ; i < MAX_LEVELS ; i + + ) {
__u64 cgrp_id = bpf_get_current_ancestor_cgroup_id ( i ) ;
if ( cgrp_id = = 0 )
break ;
// convert cgroup-id to a map index
elem = bpf_map_lookup_elem ( & cgrp_idx , & cgrp_id ) ;
if ( ! elem )
continue ;
cgrps [ cnt + + ] = * elem ;
if ( cnt = = size )
break ;
}
return cnt ;
}
static int bperf_cgroup_count ( void )
{
register __u32 idx = 0 ; // to have it in a register to pass BPF verifier
register int c = 0 ;
struct bpf_perf_event_value val , delta , * prev_val , * cgrp_val ;
__u32 cpu = bpf_get_smp_processor_id ( ) ;
__u32 cgrp_idx [ MAX_LEVELS ] ;
int cgrp_cnt ;
__u32 key , cgrp ;
long err ;
if ( use_cgroup_v2 )
cgrp_cnt = get_cgroup_v2_idx ( cgrp_idx , MAX_LEVELS ) ;
else
cgrp_cnt = get_cgroup_v1_idx ( cgrp_idx , MAX_LEVELS ) ;
for ( ; idx < MAX_EVENTS ; idx + + ) {
if ( idx = = num_events )
break ;
// XXX: do not pass idx directly (for verifier)
key = idx ;
// this is per-cpu array for diff
prev_val = bpf_map_lookup_elem ( & prev_readings , & key ) ;
if ( ! prev_val ) {
val . counter = val . enabled = val . running = 0 ;
bpf_map_update_elem ( & prev_readings , & key , & val , BPF_ANY ) ;
prev_val = bpf_map_lookup_elem ( & prev_readings , & key ) ;
if ( ! prev_val )
continue ;
}
// read from global perf_event array
key = idx * num_cpus + cpu ;
err = bpf_perf_event_read_value ( & events , key , & val , sizeof ( val ) ) ;
if ( err )
continue ;
if ( enabled ) {
delta . counter = val . counter - prev_val - > counter ;
delta . enabled = val . enabled - prev_val - > enabled ;
delta . running = val . running - prev_val - > running ;
for ( c = 0 ; c < MAX_LEVELS ; c + + ) {
if ( c = = cgrp_cnt )
break ;
cgrp = cgrp_idx [ c ] ;
// aggregate the result by cgroup
key = cgrp * num_events + idx ;
cgrp_val = bpf_map_lookup_elem ( & cgrp_readings , & key ) ;
if ( cgrp_val ) {
cgrp_val - > counter + = delta . counter ;
cgrp_val - > enabled + = delta . enabled ;
cgrp_val - > running + = delta . running ;
} else {
bpf_map_update_elem ( & cgrp_readings , & key ,
& delta , BPF_ANY ) ;
}
}
}
* prev_val = val ;
}
return 0 ;
}
// This will be attached to cgroup-switches event for each cpu
2022-09-16 11:41:29 -07:00
SEC ( " perf_event " )
2021-07-01 14:12:27 -07:00
int BPF_PROG ( on_cgrp_switch )
{
return bperf_cgroup_count ( ) ;
}
SEC ( " raw_tp/sched_switch " )
int BPF_PROG ( trigger_read )
{
return bperf_cgroup_count ( ) ;
}
char LICENSE [ ] SEC ( " license " ) = " Dual BSD/GPL " ;