| /* SPDX-License-Identifier: GPL-2.0 */ |
| #include <inttypes.h> |
| #include <math.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| |
| #include "dwarf-regs.h" /* for EM_HOST */ |
| #include "syscalltbl.h" |
| #include "util/cgroup.h" |
| #include "util/hashmap.h" |
| #include "util/trace.h" |
| #include "util/util.h" |
| #include <bpf/bpf.h> |
| #include <linux/rbtree.h> |
| #include <linux/time64.h> |
| #include <tools/libc_compat.h> /* reallocarray */ |
| |
| #include "bpf_skel/syscall_summary.h" |
| #include "bpf_skel/syscall_summary.skel.h" |
| |
| |
| static struct syscall_summary_bpf *skel; |
| static struct rb_root cgroups = RB_ROOT; |
| |
| int trace_prepare_bpf_summary(enum trace_summary_mode mode) |
| { |
| skel = syscall_summary_bpf__open(); |
| if (skel == NULL) { |
| fprintf(stderr, "failed to open syscall summary bpf skeleton\n"); |
| return -1; |
| } |
| |
| if (mode == SUMMARY__BY_THREAD) |
| skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD; |
| else if (mode == SUMMARY__BY_CGROUP) |
| skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP; |
| else |
| skel->rodata->aggr_mode = SYSCALL_AGGR_CPU; |
| |
| if (cgroup_is_v2("perf_event") > 0) |
| skel->rodata->use_cgroup_v2 = 1; |
| |
| if (syscall_summary_bpf__load(skel) < 0) { |
| fprintf(stderr, "failed to load syscall summary bpf skeleton\n"); |
| return -1; |
| } |
| |
| if (syscall_summary_bpf__attach(skel) < 0) { |
| fprintf(stderr, "failed to attach syscall summary bpf skeleton\n"); |
| return -1; |
| } |
| |
| if (mode == SUMMARY__BY_CGROUP) |
| read_all_cgroups(&cgroups); |
| |
| return 0; |
| } |
| |
| void trace_start_bpf_summary(void) |
| { |
| skel->bss->enabled = 1; |
| } |
| |
| void trace_end_bpf_summary(void) |
| { |
| skel->bss->enabled = 0; |
| } |
| |
| struct syscall_node { |
| int syscall_nr; |
| struct syscall_stats stats; |
| }; |
| |
| static double rel_stddev(struct syscall_stats *stat) |
| { |
| double variance, average; |
| |
| if (stat->count < 2) |
| return 0; |
| |
| average = (double)stat->total_time / stat->count; |
| |
| variance = stat->squared_sum; |
| variance -= (stat->total_time * stat->total_time) / stat->count; |
| variance /= stat->count - 1; |
| |
| return 100 * sqrt(variance / stat->count) / average; |
| } |
| |
| /* |
| * The syscall_data is to maintain syscall stats ordered by total time. |
| * It supports different summary modes like per-thread or global. |
| * |
| * For per-thread stats, it uses two-level data strurcture - |
| * syscall_data is keyed by TID and has an array of nodes which |
| * represents each syscall for the thread. |
| * |
| * For global stats, it's still two-level technically but we don't need |
| * per-cpu analysis so it's keyed by the syscall number to combine stats |
| * from different CPUs. And syscall_data always has a syscall_node so |
| * it can effectively work as flat hierarchy. |
| * |
| * For per-cgroup stats, it uses two-level data structure like thread |
| * syscall_data is keyed by CGROUP and has an array of node which |
| * represents each syscall for the cgroup. |
| */ |
| struct syscall_data { |
| u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */ |
| int nr_events; |
| int nr_nodes; |
| u64 total_time; |
| struct syscall_node *nodes; |
| }; |
| |
| static int datacmp(const void *a, const void *b) |
| { |
| const struct syscall_data * const *sa = a; |
| const struct syscall_data * const *sb = b; |
| |
| return (*sa)->total_time > (*sb)->total_time ? -1 : 1; |
| } |
| |
| static int nodecmp(const void *a, const void *b) |
| { |
| const struct syscall_node *na = a; |
| const struct syscall_node *nb = b; |
| |
| return na->stats.total_time > nb->stats.total_time ? -1 : 1; |
| } |
| |
| static size_t sc_node_hash(long key, void *ctx __maybe_unused) |
| { |
| return key; |
| } |
| |
| static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused) |
| { |
| return key1 == key2; |
| } |
| |
| static int print_common_stats(struct syscall_data *data, FILE *fp) |
| { |
| int printed = 0; |
| |
| for (int i = 0; i < data->nr_nodes; i++) { |
| struct syscall_node *node = &data->nodes[i]; |
| struct syscall_stats *stat = &node->stats; |
| double total = (double)(stat->total_time) / NSEC_PER_MSEC; |
| double min = (double)(stat->min_time) / NSEC_PER_MSEC; |
| double max = (double)(stat->max_time) / NSEC_PER_MSEC; |
| double avg = total / stat->count; |
| const char *name; |
| |
| /* TODO: support other ABIs */ |
| name = syscalltbl__name(EM_HOST, node->syscall_nr); |
| if (name) |
| printed += fprintf(fp, " %-15s", name); |
| else |
| printed += fprintf(fp, " syscall:%-7d", node->syscall_nr); |
| |
| printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n", |
| stat->count, stat->error, total, min, avg, max, |
| rel_stddev(stat)); |
| } |
| return printed; |
| } |
| |
| static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key, |
| struct syscall_stats *map_data) |
| { |
| struct syscall_data *data; |
| struct syscall_node *nodes; |
| |
| if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) { |
| data = zalloc(sizeof(*data)); |
| if (data == NULL) |
| return -ENOMEM; |
| |
| data->key = map_key->cpu_or_tid; |
| if (hashmap__add(hash, data->key, data) < 0) { |
| free(data); |
| return -ENOMEM; |
| } |
| } |
| |
| /* update thread total stats */ |
| data->nr_events += map_data->count; |
| data->total_time += map_data->total_time; |
| |
| nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); |
| if (nodes == NULL) |
| return -ENOMEM; |
| |
| data->nodes = nodes; |
| nodes = &data->nodes[data->nr_nodes++]; |
| nodes->syscall_nr = map_key->nr; |
| |
| /* each thread has an entry for each syscall, just use the stat */ |
| memcpy(&nodes->stats, map_data, sizeof(*map_data)); |
| return 0; |
| } |
| |
| static int print_thread_stat(struct syscall_data *data, FILE *fp) |
| { |
| int printed = 0; |
| |
| qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); |
| |
| printed += fprintf(fp, " thread (%d), ", (int)data->key); |
| printed += fprintf(fp, "%d events\n\n", data->nr_events); |
| |
| printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); |
| printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); |
| printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); |
| |
| printed += print_common_stats(data, fp); |
| printed += fprintf(fp, "\n\n"); |
| |
| return printed; |
| } |
| |
| static int print_thread_stats(struct syscall_data **data, int nr_data, FILE *fp) |
| { |
| int printed = 0; |
| |
| for (int i = 0; i < nr_data; i++) |
| printed += print_thread_stat(data[i], fp); |
| |
| return printed; |
| } |
| |
| static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key, |
| struct syscall_stats *map_data) |
| { |
| struct syscall_data *data; |
| struct syscall_stats *stat; |
| |
| if (!hashmap__find(hash, map_key->nr, &data)) { |
| data = zalloc(sizeof(*data)); |
| if (data == NULL) |
| return -ENOMEM; |
| |
| data->nodes = zalloc(sizeof(*data->nodes)); |
| if (data->nodes == NULL) { |
| free(data); |
| return -ENOMEM; |
| } |
| |
| data->nr_nodes = 1; |
| data->key = map_key->nr; |
| data->nodes->syscall_nr = data->key; |
| |
| if (hashmap__add(hash, data->key, data) < 0) { |
| free(data->nodes); |
| free(data); |
| return -ENOMEM; |
| } |
| } |
| |
| /* update total stats for this syscall */ |
| data->nr_events += map_data->count; |
| data->total_time += map_data->total_time; |
| |
| /* This is sum of the same syscall from different CPUs */ |
| stat = &data->nodes->stats; |
| |
| stat->total_time += map_data->total_time; |
| stat->squared_sum += map_data->squared_sum; |
| stat->count += map_data->count; |
| stat->error += map_data->error; |
| |
| if (stat->max_time < map_data->max_time) |
| stat->max_time = map_data->max_time; |
| if (stat->min_time > map_data->min_time || stat->min_time == 0) |
| stat->min_time = map_data->min_time; |
| |
| return 0; |
| } |
| |
| static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp) |
| { |
| int printed = 0; |
| int nr_events = 0; |
| |
| for (int i = 0; i < nr_data; i++) |
| nr_events += data[i]->nr_events; |
| |
| printed += fprintf(fp, " total, %d events\n\n", nr_events); |
| |
| printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); |
| printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); |
| printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); |
| |
| for (int i = 0; i < nr_data; i++) |
| printed += print_common_stats(data[i], fp); |
| |
| printed += fprintf(fp, "\n\n"); |
| return printed; |
| } |
| |
| static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key, |
| struct syscall_stats *map_data) |
| { |
| struct syscall_data *data; |
| struct syscall_node *nodes; |
| |
| if (!hashmap__find(hash, map_key->cgroup, &data)) { |
| data = zalloc(sizeof(*data)); |
| if (data == NULL) |
| return -ENOMEM; |
| |
| data->key = map_key->cgroup; |
| if (hashmap__add(hash, data->key, data) < 0) { |
| free(data); |
| return -ENOMEM; |
| } |
| } |
| |
| /* update thread total stats */ |
| data->nr_events += map_data->count; |
| data->total_time += map_data->total_time; |
| |
| nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); |
| if (nodes == NULL) |
| return -ENOMEM; |
| |
| data->nodes = nodes; |
| nodes = &data->nodes[data->nr_nodes++]; |
| nodes->syscall_nr = map_key->nr; |
| |
| /* each thread has an entry for each syscall, just use the stat */ |
| memcpy(&nodes->stats, map_data, sizeof(*map_data)); |
| return 0; |
| } |
| |
| static int print_cgroup_stat(struct syscall_data *data, FILE *fp) |
| { |
| int printed = 0; |
| struct cgroup *cgrp = __cgroup__find(&cgroups, data->key); |
| |
| qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); |
| |
| if (cgrp) |
| printed += fprintf(fp, " cgroup %s,", cgrp->name); |
| else |
| printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key); |
| |
| printed += fprintf(fp, " %d events\n\n", data->nr_events); |
| |
| printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); |
| printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); |
| printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); |
| |
| printed += print_common_stats(data, fp); |
| printed += fprintf(fp, "\n\n"); |
| |
| return printed; |
| } |
| |
| static int print_cgroup_stats(struct syscall_data **data, int nr_data, FILE *fp) |
| { |
| int printed = 0; |
| |
| for (int i = 0; i < nr_data; i++) |
| printed += print_cgroup_stat(data[i], fp); |
| |
| return printed; |
| } |
| |
| int trace_print_bpf_summary(FILE *fp) |
| { |
| struct bpf_map *map = skel->maps.syscall_stats_map; |
| struct syscall_key *prev_key, key; |
| struct syscall_data **data = NULL; |
| struct hashmap schash; |
| struct hashmap_entry *entry; |
| int nr_data = 0; |
| int printed = 0; |
| int i; |
| size_t bkt; |
| |
| hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL); |
| |
| printed = fprintf(fp, "\n Summary of events:\n\n"); |
| |
| /* get stats from the bpf map */ |
| prev_key = NULL; |
| while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) { |
| struct syscall_stats stat; |
| |
| if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) { |
| switch (skel->rodata->aggr_mode) { |
| case SYSCALL_AGGR_THREAD: |
| update_thread_stats(&schash, &key, &stat); |
| break; |
| case SYSCALL_AGGR_CPU: |
| update_total_stats(&schash, &key, &stat); |
| break; |
| case SYSCALL_AGGR_CGROUP: |
| update_cgroup_stats(&schash, &key, &stat); |
| break; |
| default: |
| break; |
| } |
| } |
| |
| prev_key = &key; |
| } |
| |
| nr_data = hashmap__size(&schash); |
| data = calloc(nr_data, sizeof(*data)); |
| if (data == NULL) |
| goto out; |
| |
| i = 0; |
| hashmap__for_each_entry(&schash, entry, bkt) |
| data[i++] = entry->pvalue; |
| |
| qsort(data, nr_data, sizeof(*data), datacmp); |
| |
| switch (skel->rodata->aggr_mode) { |
| case SYSCALL_AGGR_THREAD: |
| printed += print_thread_stats(data, nr_data, fp); |
| break; |
| case SYSCALL_AGGR_CPU: |
| printed += print_total_stats(data, nr_data, fp); |
| break; |
| case SYSCALL_AGGR_CGROUP: |
| printed += print_cgroup_stats(data, nr_data, fp); |
| break; |
| default: |
| break; |
| } |
| |
| for (i = 0; i < nr_data && data; i++) { |
| free(data[i]->nodes); |
| free(data[i]); |
| } |
| free(data); |
| |
| out: |
| hashmap__clear(&schash); |
| return printed; |
| } |
| |
| void trace_cleanup_bpf_summary(void) |
| { |
| if (!RB_EMPTY_ROOT(&cgroups)) { |
| struct cgroup *cgrp, *tmp; |
| |
| rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node) |
| cgroup__put(cgrp); |
| |
| cgroups = RB_ROOT; |
| } |
| |
| syscall_summary_bpf__destroy(skel); |
| } |