From ffa84f806dbd4938f7aad3b0bb37a93bdf5188b1 Mon Sep 17 00:00:00 2001 From: Eunseon Lee Date: Mon, 16 Sep 2024 22:48:12 +0900 Subject: [PATCH 1/2] libbpf-tools/offcputime: Add multi process/thread support This is a test example. # ./offcputime -p 16,48 Tracing off-CPU time (us) of PID [16, 48]... Hit Ctrl-C to end. bpf_prog_a42aae11c0bc18f2_sched_switch bpf_prog_a42aae11c0bc18f2_sched_switch bpf_trace_run4 __traceiter_sched_switch __schedule schedule worker_thread kthread ret_from_fork ret_from_fork_asm - kworker/2:1 (48) 3353019 bpf_prog_a42aae11c0bc18f2_sched_switch bpf_prog_a42aae11c0bc18f2_sched_switch bpf_trace_run4 __traceiter_sched_switch __schedule schedule rcu_gp_kthread kthread ret_from_fork ret_from_fork_asm - rcu_preempt (16) 1720974 --- libbpf-tools/offcputime.bpf.c | 25 ++++++- libbpf-tools/offcputime.c | 136 ++++++++++++++++++++++++++++------ libbpf-tools/offcputime.h | 2 + 3 files changed, 135 insertions(+), 28 deletions(-) diff --git a/libbpf-tools/offcputime.bpf.c b/libbpf-tools/offcputime.bpf.c index cb20d5017165..ce1f51bf1bae 100644 --- a/libbpf-tools/offcputime.bpf.c +++ b/libbpf-tools/offcputime.bpf.c @@ -14,8 +14,8 @@ const volatile bool kernel_threads_only = false; const volatile bool user_threads_only = false; const volatile __u64 max_block_ns = -1; const volatile __u64 min_block_ns = 1; -const volatile pid_t targ_tgid = -1; -const volatile pid_t targ_pid = -1; +const volatile bool filter_by_tgid = false; +const volatile bool filter_by_pid = false; const volatile long state = -1; struct internal_key { @@ -42,11 +42,28 @@ struct { __uint(max_entries, MAX_ENTRIES); } info SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u8); + __uint(max_entries, MAX_PID_NR); +} tgids SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u8); + __uint(max_entries, MAX_TID_NR); +} pids SEC(".maps"); + static bool allow_record(struct task_struct *t) { - if (targ_tgid != -1 && targ_tgid != t->tgid) + u32 tgid = t->tgid; + u32 pid = t->pid; + + if (filter_by_tgid && !bpf_map_lookup_elem(&tgids, &tgid)) return false; - if (targ_pid != -1 && targ_pid != t->pid) + if (filter_by_pid && !bpf_map_lookup_elem(&pids, &pid)) return false; if (user_threads_only && t->flags & PF_KTHREAD) return false; diff --git a/libbpf-tools/offcputime.c b/libbpf-tools/offcputime.c index 9a4590ea94bb..cdb25c8ad42e 100644 --- a/libbpf-tools/offcputime.c +++ b/libbpf-tools/offcputime.c @@ -16,8 +16,8 @@ #include "trace_helpers.h" static struct env { - pid_t pid; - pid_t tid; + pid_t pids[MAX_PID_NR]; + pid_t tids[MAX_TID_NR]; bool user_threads_only; bool kernel_threads_only; int stack_storage_size; @@ -28,8 +28,6 @@ static struct env { int duration; bool verbose; } env = { - .pid = -1, - .tid = -1, .stack_storage_size = 1024, .perf_max_stack_depth = 127, .min_block_time = 1, @@ -52,8 +50,8 @@ const char argp_program_doc[] = " offcputime 5 # trace for 5 seconds only\n" " offcputime -m 1000 # trace only events that last more than 1000 usec\n" " offcputime -M 10000 # trace only events that last less than 10000 usec\n" -" offcputime -p 185 # only trace threads for PID 185\n" -" offcputime -t 188 # only trace thread 188\n" +" offcputime -p 185,175,165 # only trace threads for PID 185,175,165\n" +" offcputime -t 188,120,134 # only trace threads 188,120,134\n" " offcputime -u # only trace user threads (no kernel)\n" " offcputime -k # only trace kernel threads (no user)\n"; @@ -62,8 +60,8 @@ const char argp_program_doc[] = #define OPT_STATE 3 /* --state */ static const struct argp_option opts[] = { - { "pid", 'p', "PID", 0, "Trace this PID only", 0 }, - { "tid", 't', "TID", 0, "Trace this TID only", 0 }, + { "pid", 'p', "PID", 0, "Trace these PIDs only, comma-separated list", 0 }, + { "tid", 't', "TID", 0, "Trace these TIDs only, comma-separated list", 0 }, { "user-threads-only", 'u', NULL, 0, "User threads only (no kernel threads)", 0 }, { "kernel-threads-only", 'k', NULL, 0, @@ -82,9 +80,31 @@ static const struct argp_option opts[] = { {}, }; +static int split_pidstr(char *s, char* delim, int max_split, pid_t *pids) +{ + char *pid; + int nr = 0; + + errno = 0; + pid = strtok(s, delim); + while (pid) { + if (nr >= max_split) + return -ENOBUFS; + + pids[nr++] = strtol(pid, NULL, 10); + if (errno) + return -errno; + + pid = strtok(NULL, delim); + } + + return 0; +} + static error_t parse_arg(int key, char *arg, struct argp_state *state) { static int pos_args; + int ret; switch (key) { case 'h': @@ -94,18 +114,26 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) env.verbose = true; break; case 'p': - errno = 0; - env.pid = strtol(arg, NULL, 10); - if (errno) { - fprintf(stderr, "invalid PID: %s\n", arg); + ret = split_pidstr(strdup(arg), ",", MAX_PID_NR, env.pids); + if (ret) { + if (ret == -ENOBUFS) + fprintf(stderr, "the number of pid is too big, please " + "increase MAX_PID_NR's value and recompile\n"); + else + fprintf(stderr, "invalid PID: %s\n", arg); + argp_usage(state); } break; case 't': - errno = 0; - env.tid = strtol(arg, NULL, 10); - if (errno || env.tid <= 0) { - fprintf(stderr, "Invalid TID: %s\n", arg); + ret = split_pidstr(strdup(arg), ",", MAX_TID_NR, env.tids); + if (ret) { + if (ret == -ENOBUFS) + fprintf(stderr, "the number of tid is too big, please " + "increase MAX_TID_NR's value and recompile\n"); + else + fprintf(stderr, "invalid TID: %s\n", arg); + argp_usage(state); } break; @@ -281,6 +309,41 @@ static void print_map(struct ksyms *ksyms, struct syms_cache *syms_cache, free(ip); } +static bool print_header_threads() +{ + int i; + bool printed = false; + + if (env.pids[0]) { + printf(" PID ["); + for (i = 0; i < MAX_PID_NR && env.pids[i]; i++) + printf("%d%s", env.pids[i], (i < MAX_PID_NR - 1 && env.pids[i + 1]) ? ", " : "]"); + printed = true; + } + + if (env.tids[0]) { + printf(" TID ["); + for (i = 0; i < MAX_TID_NR && env.tids[i]; i++) + printf("%d%s", env.tids[i], (i < MAX_TID_NR - 1 && env.tids[i + 1]) ? ", " : "]"); + printed = true; + } + + return printed; +} + +static void print_headers() +{ + printf("Tracing off-CPU time (us) of"); + + if (!print_header_threads()) + printf(" all threads"); + + if (env.duration < 99999999) + printf(" for %d secs.\n", env.duration); + else + printf("... Hit Ctrl-C to end.\n"); +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -291,7 +354,9 @@ int main(int argc, char **argv) struct syms_cache *syms_cache = NULL; struct ksyms *ksyms = NULL; struct offcputime_bpf *obj; - int err; + int pids_fd, tids_fd; + int err, i; + __u8 val = 0; err = argp_parse(&argp, argc, argv, 0, NULL, NULL); if (err) @@ -314,14 +379,18 @@ int main(int argc, char **argv) } /* initialize global data (filtering options) */ - obj->rodata->targ_tgid = env.pid; - obj->rodata->targ_pid = env.tid; obj->rodata->user_threads_only = env.user_threads_only; obj->rodata->kernel_threads_only = env.kernel_threads_only; obj->rodata->state = env.state; obj->rodata->min_block_ns = env.min_block_time; obj->rodata->max_block_ns = env.max_block_time; + /* User space PID and TID correspond to TGID and PID in the kernel, respectively */ + if (env.pids[0]) + obj->rodata->filter_by_tgid = true; + if (env.tids[0]) + obj->rodata->filter_by_pid = true; + bpf_map__set_value_size(obj->maps.stackmap, env.perf_max_stack_depth * sizeof(unsigned long)); bpf_map__set_max_entries(obj->maps.stackmap, env.stack_storage_size); @@ -331,6 +400,28 @@ int main(int argc, char **argv) fprintf(stderr, "failed to load BPF programs\n"); goto cleanup; } + + if (env.pids[0]) { + /* User pids_fd points to the tgids map in the BPF program */ + pids_fd = bpf_map__fd(obj->maps.tgids); + for (i = 0; i < MAX_PID_NR && env.pids[i]; i++) { + if (bpf_map_update_elem(pids_fd, &(env.pids[i]), &val, BPF_ANY) != 0) { + fprintf(stderr, "failed to init pids map: %s\n", strerror(errno)); + goto cleanup; + } + } + } + if (env.tids[0]) { + /* User tids_fd points to the pids map in the BPF program */ + tids_fd = bpf_map__fd(obj->maps.pids); + for (i = 0; i < MAX_TID_NR && env.tids[i]; i++) { + if (bpf_map_update_elem(tids_fd, &(env.tids[i]), &val, BPF_ANY) != 0) { + fprintf(stderr, "failed to init tids map: %s\n", strerror(errno)); + goto cleanup; + } + } + } + ksyms = ksyms__load(); if (!ksyms) { fprintf(stderr, "failed to load kallsyms\n"); @@ -349,11 +440,8 @@ int main(int argc, char **argv) signal(SIGINT, sig_handler); - printf("Tracing off-CPU time (us)"); - if (env.duration < 99999999) - printf(" for %d secs.\n", env.duration); - else - printf("... Hit Ctrl-C to end.\n"); + print_headers(); + /* * We'll get sleep interrupted when someone presses Ctrl-C (which will * be "handled" with noop by sig_handler). diff --git a/libbpf-tools/offcputime.h b/libbpf-tools/offcputime.h index 43ca3647d1c9..2bcd0d0ee402 100644 --- a/libbpf-tools/offcputime.h +++ b/libbpf-tools/offcputime.h @@ -3,6 +3,8 @@ #define __OFFCPUTIME_H #define TASK_COMM_LEN 16 +#define MAX_PID_NR 30 +#define MAX_TID_NR 30 struct key_t { __u32 pid; From 6478750621908c171ac1c710bf95e79aaa8afd72 Mon Sep 17 00:00:00 2001 From: Eunseon Lee Date: Thu, 26 Sep 2024 20:08:16 +0900 Subject: [PATCH 2/2] libbpf-tools/trace_helpers: Add a utility to split and convert string Add a utility API to split and convert strings for argument parsing. Also, apply the API usage to offcputime and profile to remove duplicates. --- libbpf-tools/offcputime.c | 27 ++++------------------- libbpf-tools/profile.c | 27 ++++------------------- libbpf-tools/trace_helpers.c | 42 ++++++++++++++++++++++++++++++++++++ libbpf-tools/trace_helpers.h | 12 +++++++++++ 4 files changed, 62 insertions(+), 46 deletions(-) diff --git a/libbpf-tools/offcputime.c b/libbpf-tools/offcputime.c index cdb25c8ad42e..4e9497c9f5c3 100644 --- a/libbpf-tools/offcputime.c +++ b/libbpf-tools/offcputime.c @@ -80,27 +80,6 @@ static const struct argp_option opts[] = { {}, }; -static int split_pidstr(char *s, char* delim, int max_split, pid_t *pids) -{ - char *pid; - int nr = 0; - - errno = 0; - pid = strtok(s, delim); - while (pid) { - if (nr >= max_split) - return -ENOBUFS; - - pids[nr++] = strtol(pid, NULL, 10); - if (errno) - return -errno; - - pid = strtok(NULL, delim); - } - - return 0; -} - static error_t parse_arg(int key, char *arg, struct argp_state *state) { static int pos_args; @@ -114,7 +93,8 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) env.verbose = true; break; case 'p': - ret = split_pidstr(strdup(arg), ",", MAX_PID_NR, env.pids); + ret = split_convert(strdup(arg), ",", env.pids, sizeof(env.pids), + sizeof(pid_t), str_to_int); if (ret) { if (ret == -ENOBUFS) fprintf(stderr, "the number of pid is too big, please " @@ -126,7 +106,8 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } break; case 't': - ret = split_pidstr(strdup(arg), ",", MAX_TID_NR, env.tids); + ret = split_convert(strdup(arg), ",", env.tids, sizeof(env.tids), + sizeof(pid_t), str_to_int); if (ret) { if (ret == -ENOBUFS) fprintf(stderr, "the number of tid is too big, please " diff --git a/libbpf-tools/profile.c b/libbpf-tools/profile.c index 03f0ee46c79f..b83158656098 100644 --- a/libbpf-tools/profile.c +++ b/libbpf-tools/profile.c @@ -132,27 +132,6 @@ struct syms_cache *syms_cache; struct syms *syms; static char syminfo[SYM_INFO_LEN]; -static int split_pidstr(char *s, char* sep, int max_split, pid_t *pids) -{ - char *pid; - int nr = 0; - - errno = 0; - pid = strtok(s, sep); - while (pid) { - if (nr >= max_split) - return -ENOBUFS; - - pids[nr++] = strtol(pid, NULL, 10); - if (errno) - return -errno; - - pid = strtok(NULL, ","); - } - - return 0; -} - static error_t parse_arg(int key, char *arg, struct argp_state *state) { static int pos_args; @@ -166,7 +145,8 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) env.verbose = true; break; case 'p': - ret = split_pidstr(strdup(arg), ",", MAX_PID_NR, env.pids); + ret = split_convert(strdup(arg), ",", env.pids, sizeof(env.pids), + sizeof(pid_t), str_to_int); if (ret) { if (ret == -ENOBUFS) fprintf(stderr, "the number of pid is too big, please " @@ -178,7 +158,8 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } break; case 'L': - ret = split_pidstr(strdup(arg), ",", MAX_TID_NR, env.tids); + ret = split_convert(strdup(arg), ",", env.tids, sizeof(env.tids), + sizeof(pid_t), str_to_int); if (ret) { if (ret == -ENOBUFS) fprintf(stderr, "the number of tid is too big, please " diff --git a/libbpf-tools/trace_helpers.c b/libbpf-tools/trace_helpers.c index 732627a21407..2c40b1a85fa4 100644 --- a/libbpf-tools/trace_helpers.c +++ b/libbpf-tools/trace_helpers.c @@ -1247,3 +1247,45 @@ bool probe_ringbuf() close(map_fd); return true; } + +int split_convert(char *s, const char* delim, void *elems, size_t elems_size, + size_t elem_size, convert_fn_t convert) +{ + char *token; + int ret; + char *pos = (char *)elems; + + if (!s || !delim || !elems) + return -EINVAL; + + token = strtok(s, delim); + while (token) { + if (pos + elem_size > (char*)elems + elems_size) + return -ENOBUFS; + + ret = convert(token, pos); + if (ret) + return -ret; + + pos += elem_size; + token = strtok(NULL, delim); + } + + return 0; +} + +int str_to_int(const char *src, void *dest) +{ + errno = 0; + *(int*)dest = strtol(src, NULL, 10); + + return errno; +} + +int str_to_long(const char *src, void *dest) +{ + errno = 0; + *(long*)dest = strtol(src, NULL, 10); + + return errno; +} diff --git a/libbpf-tools/trace_helpers.h b/libbpf-tools/trace_helpers.h index 651cd107e91a..582f19054f8c 100644 --- a/libbpf-tools/trace_helpers.h +++ b/libbpf-tools/trace_helpers.h @@ -108,4 +108,16 @@ bool module_btf_exists(const char *mod); bool probe_tp_btf(const char *name); bool probe_ringbuf(); +typedef int (*convert_fn_t)(const char *src, void *dest); +int split_convert(char *s, const char* delim, void *elems, size_t elems_size, + size_t elem_size, convert_fn_t convert); +/* + * Implementations of convert_fn_t. + * This can be replaced with a user-defined callback function. + */ +/* converts a string to an integer */ +int str_to_int(const char *src, void *dest); +/* converts a string to a long integer */ +int str_to_long(const char *src, void *dest); + #endif /* __TRACE_HELPERS_H */