perf record: Handle argument change in sched_switch
Recently sched_switch tracepoint added a new argument for prev_state, but it's hard to handle the change in a BPF program. Instead, we can check the function prototype in BTF before loading the program. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Ian Rogers <irogers@google.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Blake Jones <blakejones@google.com> Cc: Hao Luo <haoluo@google.com> Cc: Ingo Molnar <mingo@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Milian Wolff <milian.wolff@kdab.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Song Liu <songliubraving@fb.com> Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20220518224725.742882-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
		
				
					committed by
					
						 Arnaldo Carvalho de Melo
						Arnaldo Carvalho de Melo
					
				
			
			
				
	
			
			
			
						parent
						
							10742d0c07
						
					
				
				
					commit
					b36888f71c
				
			| @@ -89,6 +89,33 @@ static void off_cpu_finish(void *arg __maybe_unused) | ||||
| 	off_cpu_bpf__destroy(skel); | ||||
| } | ||||
| 
 | ||||
| /* v5.18 kernel added prev_state arg, so it needs to check the signature */ | ||||
| static void check_sched_switch_args(void) | ||||
| { | ||||
| 	const struct btf *btf = bpf_object__btf(skel->obj); | ||||
| 	const struct btf_type *t1, *t2, *t3; | ||||
| 	u32 type_id; | ||||
| 
 | ||||
| 	type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch", | ||||
| 					 BTF_KIND_TYPEDEF); | ||||
| 	if ((s32)type_id < 0) | ||||
| 		return; | ||||
| 
 | ||||
| 	t1 = btf__type_by_id(btf, type_id); | ||||
| 	if (t1 == NULL) | ||||
| 		return; | ||||
| 
 | ||||
| 	t2 = btf__type_by_id(btf, t1->type); | ||||
| 	if (t2 == NULL || !btf_is_ptr(t2)) | ||||
| 		return; | ||||
| 
 | ||||
| 	t3 = btf__type_by_id(btf, t2->type); | ||||
| 	if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) { | ||||
| 		/* new format: pass prev_state as 4th arg */ | ||||
| 		skel->rodata->has_prev_state = true; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| int off_cpu_prepare(struct evlist *evlist, struct target *target) | ||||
| { | ||||
| 	int err, fd, i; | ||||
| @@ -117,6 +144,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target) | ||||
| 	} | ||||
| 
 | ||||
| 	set_max_rlimit(); | ||||
| 	check_sched_switch_args(); | ||||
| 
 | ||||
| 	err = off_cpu_bpf__load(skel); | ||||
| 	if (err) { | ||||
|   | ||||
| @@ -72,6 +72,8 @@ int enabled = 0; | ||||
| int has_cpu = 0; | ||||
| int has_task = 0; | ||||
| 
 | ||||
| const volatile bool has_prev_state = false; | ||||
| 
 | ||||
| /*
 | ||||
|  * Old kernel used to call it task_struct->state and now it's '__state'. | ||||
|  * Use BPF CO-RE "ignored suffix rule" to deal with it like below: | ||||
| @@ -121,22 +123,13 @@ static inline int can_record(struct task_struct *t, int state) | ||||
| 	return 1; | ||||
| } | ||||
| 
 | ||||
| SEC("tp_btf/sched_switch") | ||||
| int on_switch(u64 *ctx) | ||||
| static int off_cpu_stat(u64 *ctx, struct task_struct *prev, | ||||
| 			struct task_struct *next, int state) | ||||
| { | ||||
| 	__u64 ts; | ||||
| 	int state; | ||||
| 	__u32 stack_id; | ||||
| 	struct task_struct *prev, *next; | ||||
| 	struct tstamp_data *pelem; | ||||
| 
 | ||||
| 	if (!enabled) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	prev = (struct task_struct *)ctx[1]; | ||||
| 	next = (struct task_struct *)ctx[2]; | ||||
| 	state = get_task_state(prev); | ||||
| 
 | ||||
| 	ts = bpf_ktime_get_ns(); | ||||
| 
 | ||||
| 	if (!can_record(prev, state)) | ||||
| @@ -180,4 +173,24 @@ next: | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| SEC("tp_btf/sched_switch") | ||||
| int on_switch(u64 *ctx) | ||||
| { | ||||
| 	struct task_struct *prev, *next; | ||||
| 	int prev_state; | ||||
| 
 | ||||
| 	if (!enabled) | ||||
| 		return 0; | ||||
| 
 | ||||
| 	prev = (struct task_struct *)ctx[1]; | ||||
| 	next = (struct task_struct *)ctx[2]; | ||||
| 
 | ||||
| 	if (has_prev_state) | ||||
| 		prev_state = (int)ctx[3]; | ||||
| 	else | ||||
| 		prev_state = get_task_state(prev); | ||||
| 
 | ||||
| 	return off_cpu_stat(ctx, prev, next, prev_state); | ||||
| } | ||||
| 
 | ||||
| char LICENSE[] SEC("license") = "Dual BSD/GPL"; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user