dwarves/pfunct.c

790 lines
19 KiB
C
Raw Normal View History

/*
SPDX-License-Identifier: GPL-2.0-only
Copyright (C) 2006 Mandriva Conectiva S.A.
Copyright (C) 2006 Arnaldo Carvalho de Melo <acme@mandriva.com>
Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
*/
#include <argp.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "dwarves.h"
#include "dwarves_emit.h"
#include "dutil.h"
#include "elf_symtab.h"
static int verbose;
[CLASSES]: Add support for DW_TAG_inlined_subroutine Output of pfunct using this information (all for a make allyesconfig build): Top 5 functions by size of inlined functions in net/ipv4: [acme@newtoy guinea_pig-2.6]$ pfunct -I net/ipv4/built-in.o | sort -k3 -nr | head -5 ip_route_input: 19 7086 tcp_ack: 33 6415 do_ip_vs_set_ctl: 23 4193 q931_help: 8 3822 ip_defrag: 19 3318 [acme@newtoy guinea_pig-2.6]$ And by number of inline expansions: [acme@newtoy guinea_pig-2.6]$ pfunct -I net/ipv4/built-in.o | sort -k2 -nr | head -5 dump_packet: 35 905 tcp_v4_rcv: 34 1773 tcp_recvmsg: 34 928 tcp_ack: 33 6415 tcp_rcv_established: 31 1195 [acme@newtoy guinea_pig-2.6]$ And the list of expansions on a specific function: [acme@newtoy guinea_pig-2.6]$ pfunct -i net/ipv4/built-in.o tcp_v4_rcv /* net/ipv4/tcp_ipv4.c:1054 */ int tcp_v4_rcv(struct sk_buff * skb); /* size: 2189, variables: 8, goto labels: 6, inline expansions: 34 (1773 bytes) */ /* inline expansions in tcp_v4_rcv: current_thread_info: 8 pskb_may_pull: 36 pskb_may_pull: 29 tcp_v4_checksum_init: 139 __fswab32: 2 __fswab32: 2 inet_iif: 12 __inet_lookup: 292 __fswab16: 20 inet_ehashfn: 25 inet_ehash_bucket: 18 prefetch: 4 prefetch: 4 prefetch: 4 sock_hold: 4 xfrm4_policy_check: 59 nf_reset: 66 sk_filter: 135 __skb_trim: 20 get_softnet_dma: 68 tcp_prequeue: 257 sk_add_backlog: 40 sock_put: 27 xfrm4_policy_check: 46 tcp_checksum_complete: 29 current_thread_info: 8 sock_put: 20 xfrm4_policy_check: 50 tcp_checksum_complete: 29 current_thread_info: 8 inet_iif: 9 inet_lookup_listener: 36 inet_twsk_put: 114 tcp_v4_timewait_ack: 153 */ [acme@newtoy guinea_pig-2.6]$ Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
2006-11-03 16:41:19 +01:00
static int show_inline_expansions;
static int show_variables;
static int show_externals;
static int show_cc_inlined;
static int show_cc_uninlined;
static char *symtab_name;
static bool show_prototypes;
static bool expand_types;
static bool compilable_output;
static struct type_emissions emissions;
static uint64_t addr;
pfunct: Use load stealer to speed up --class We were loading everything to then iterate looking for functions with pointers to the --class argument, do it in the stealer and go on ditching the already processed data, greatly speeding up the process. $ pfunct -c perf_event_attr security_perf_event_open register_user_hw_breakpoint modify_user_hw_breakpoint perf_event_create_kernel_counter register_wide_hw_breakpoint bpf_lsm_perf_event_open modify_user_hw_breakpoint_check perf_event_create_kernel_counter $ $ $ pfunct bpf_lsm_perf_event_open int bpf_lsm_perf_event_open(struct perf_event_attr * attr, int type); $ $ for function in `pfunct -c perf_event_attr` ; do pfunct $function ; done int security_perf_event_open(struct perf_event_attr * attr, int type); struct perf_event * register_user_hw_breakpoint(struct perf_event_attr * attr, perf_overflow_handler_t triggered, void * context, struct task_struct * tsk); int modify_user_hw_breakpoint(struct perf_event * bp, struct perf_event_attr * attr); struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr * attr, int cpu, struct task_struct * task, perf_overflow_handler_t callback, void * context); struct perf_event * * register_wide_hw_breakpoint(struct perf_event_attr * attr, perf_overflow_handler_t triggered, void * context); int bpf_lsm_perf_event_open(struct perf_event_attr * attr, int type); int modify_user_hw_breakpoint_check(struct perf_event * bp, struct perf_event_attr * attr, bool check); struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr * attr, int cpu, struct task_struct * task, perf_overflow_handler_t callback, void * context); $ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-11-16 12:15:23 +01:00
static char *class_name;
pfunct: Use a load stealer to stop as soon as a function is found When --function/-f or just using the function name as the sole arg, greatly speeding up the process. Example using /sys/kernel/btf/vmlinux: Before: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 13,199.77 msec task-clock:u # 1.000 CPUs utilized ( +- 0.27% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,426 page-faults:u # 0.714 K/sec ( +- 0.02% ) 57,793,399,298 cycles:u # 4.378 GHz ( +- 0.29% ) (83.33%) 305,498,117 stalled-cycles-frontend:u # 0.53% frontend cycles idle ( +- 2.87% ) (83.33%) 15,537,903,799 stalled-cycles-backend:u # 26.89% backend cycles idle ( +- 4.04% ) (83.33%) 126,344,414,608 instructions:u # 2.19 insn per cycle # 0.12 stalled cycles per insn ( +- 0.00% ) (83.33%) 26,880,839,847 branches:u # 2036.463 M/sec ( +- 0.01% ) (83.34%) 122,011,679 branch-misses:u # 0.45% of all branches ( +- 0.13% ) (83.33%) 13.2005 +- 0.0355 seconds time elapsed ( +- 0.27% ) $ After: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 41.89 msec task-clock:u # 0.993 CPUs utilized ( +- 11.34% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,424 page-faults:u # 0.225 M/sec ( +- 0.01% ) 117,923,321 cycles:u # 2.815 GHz ( +- 2.13% ) (82.15%) 1,014,685 stalled-cycles-frontend:u # 0.86% frontend cycles idle ( +- 5.81% ) (83.48%) 37,728,636 stalled-cycles-backend:u # 31.99% backend cycles idle ( +- 4.77% ) (83.80%) 215,262,313 instructions:u # 1.83 insn per cycle # 0.18 stalled cycles per insn ( +- 0.96% ) (83.77%) 36,786,262 branches:u # 878.162 M/sec ( +- 0.46% ) (83.80%) 338,322 branch-misses:u # 0.92% of all branches ( +- 2.10% ) (83.01%) 0.04220 +- 0.00478 seconds time elapsed ( +- 11.33% ) $ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-11-10 19:15:15 +01:00
static char *function_name;
static struct conf_fprintf conf;
static struct conf_load conf_load = {
.conf_fprintf = &conf,
};
struct fn_stats {
struct list_head node;
struct tag *tag;
const struct cu *cu;
uint32_t nr_expansions;
uint32_t size_expansions;
uint32_t nr_files;
};
static struct fn_stats *fn_stats__new(struct tag *tag, const struct cu *cu)
{
struct fn_stats *stats = malloc(sizeof(*stats));
if (stats != NULL) {
const struct function *fn = tag__function(tag);
stats->tag = tag;
stats->cu = cu;
stats->nr_files = 1;
stats->nr_expansions = fn->cu_total_nr_inline_expansions;
stats->size_expansions = fn->cu_total_size_inline_expansions;
}
return stats;
}
static void fn_stats__delete(struct fn_stats *stats)
{
free(stats);
}
static LIST_HEAD(fn_stats__list);
static struct fn_stats *fn_stats__find(const char *name)
{
struct fn_stats *pos;
list_for_each_entry(pos, &fn_stats__list, node)
if (strcmp(function__name(tag__function(pos->tag)), name) == 0)
return pos;
return NULL;
}
static void fn_stats__delete_list(void)
{
struct fn_stats *pos, *n;
list_for_each_entry_safe(pos, n, &fn_stats__list, node) {
list_del_init(&pos->node);
fn_stats__delete(pos);
}
}
static void fn_stats__add(struct tag *tag, const struct cu *cu)
{
struct fn_stats *fns = fn_stats__new(tag, cu);
if (fns != NULL)
list_add(&fns->node, &fn_stats__list);
}
static void fn_stats_inline_exps_fmtr(const struct fn_stats *stats)
{
struct function *fn = tag__function(stats->tag);
if (fn->lexblock.nr_inline_expansions > 0)
printf("%s: %u %d\n", function__name(fn),
fn->lexblock.nr_inline_expansions,
fn->lexblock.size_inline_expansions);
}
static void fn_stats_labels_fmtr(const struct fn_stats *stats)
{
struct function *fn = tag__function(stats->tag);
if (fn->lexblock.nr_labels > 0)
printf("%s: %u\n", function__name(fn), fn->lexblock.nr_labels);
}
static void fn_stats_variables_fmtr(const struct fn_stats *stats)
{
struct function *fn = tag__function(stats->tag);
if (fn->lexblock.nr_variables > 0)
printf("%s: %u\n", function__name(fn), fn->lexblock.nr_variables);
}
static void fn_stats_nr_parms_fmtr(const struct fn_stats *stats)
{
struct function *fn = tag__function(stats->tag);
printf("%s: %u\n", function__name(fn), fn->proto.nr_parms);
}
static void fn_stats_name_len_fmtr(const struct fn_stats *stats)
{
struct function *fn = tag__function(stats->tag);
const char *name = function__name(fn);
printf("%s: %zd\n", name, strlen(name));
}
static void fn_stats_size_fmtr(const struct fn_stats *stats)
{
struct function *fn = tag__function(stats->tag);
const size_t size = function__size(fn);
if (size != 0)
printf("%s: %zd\n", function__name(fn), size);
}
static void fn_stats_fmtr(const struct fn_stats *stats)
{
if (verbose || show_prototypes) {
tag__fprintf(stats->tag, stats->cu, &conf, stdout);
putchar('\n');
if (show_prototypes)
return;
if (show_variables || show_inline_expansions)
function__fprintf_stats(stats->tag, stats->cu, &conf, stdout);
printf("/* definitions: %u */\n", stats->nr_files);
putchar('\n');
} else {
struct function *fn = tag__function(stats->tag);
puts(function__name(fn));
}
}
static void print_fn_stats(void (*formatter)(const struct fn_stats *f))
{
struct fn_stats *pos;
list_for_each_entry(pos, &fn_stats__list, node)
formatter(pos);
}
static void fn_stats_inline_stats_fmtr(const struct fn_stats *stats)
{
if (stats->nr_expansions > 1)
printf("%-31.31s %6u %7u %6u %6u\n",
function__name(tag__function(stats->tag)),
stats->size_expansions, stats->nr_expansions,
stats->size_expansions / stats->nr_expansions,
stats->nr_files);
}
static void print_total_inline_stats(void)
{
printf("%-32.32s %5.5s / %5.5s = %5.5s %s\n",
"name", "totsz", "exp#", "avgsz", "src#");
print_fn_stats(fn_stats_inline_stats_fmtr);
}
static void fn_stats__dupmsg(struct function *func,
const struct cu *func_cu,
struct function *dup __maybe_unused,
const struct cu *dup_cu,
char *hdr, const char *fmt, ...)
{
va_list args;
if (!*hdr)
printf("function: %s\nfirst: %s\ncurrent: %s\n", function__name(func), func_cu->name, dup_cu->name);
va_start(args, fmt);
vprintf(fmt, args);
va_end(args);
*hdr = 1;
}
static void fn_stats__chkdupdef(struct function *func,
const struct cu *func_cu,
struct function *dup,
const struct cu *dup_cu)
{
char hdr = 0;
const size_t func_size = function__size(func);
const size_t dup_size = function__size(dup);
if (func_size != dup_size)
fn_stats__dupmsg(func, func_cu, dup, dup_cu,
&hdr, "size: %zd != %zd\n",
func_size, dup_size);
if (func->proto.nr_parms != dup->proto.nr_parms)
fn_stats__dupmsg(func, func_cu, dup, dup_cu,
&hdr, "nr_parms: %u != %u\n",
func->proto.nr_parms, dup->proto.nr_parms);
/* XXX put more checks here: member types, member ordering, etc */
if (hdr)
putchar('\n');
}
static bool function__filter(struct function *function, struct cu *cu)
{
struct fn_stats *fstats;
const char *name;
if (!function__tag(function)->top_level)
return true;
dwarves: Remove some more DWARF details from the core Had to be a big sweeping change, but the regression tests shows just improvements :-) Now we stop using an id in struct tag, only storing the type, that now uses 16 bits only, as CTF does. Each format loader has to go on adding the types to the core, that figures out if it is a tag that can be on the tag->type field (tag__is_tag_type). Formats that already have the types separated and in sequence, such as CTF, just ask the core to insert in the types_table directly with its original ID. For DWARF, we ask the core to put it on the table, in sequence, and return the index, that is then stashed with the DWARF specific info (original id, type, decl_line, etc) and hashed by the original id. Later we recode everything, looking up via the original type, getting the small_id to put on the tag->type. The underlying debugging info not needed by the core is stashed in tag->priv, and the DWARF loader now just allocates sizeof(struct dwarf_tag) at the end of the core tag and points it there, and makes that info available thru cu->orig_info. In the future we can ask, when loading a cu, that this info be trown away, so that we reduce the memory footprint for big multi-cu files such as the Linux kernel. There is also a routine to ask for inserting a NULL, as we still have bugs in the CTF decoding and thus some entries are being lost, to avoid using an undefined pointer when traversing the types_table the ctf loader puts a NULL there via cu__table_nullify_type_entry() and then cu__for_each_type skips those. There is some more cleanups for leftovers that I avoided cleaning to reduce this changeset. And also while doing this I saw that enums can appear without any enumerators and that an array with DW_TAG_GNU_vector is actually a different tag, encoded this way till we get to DWARF4 ;-) So now we don't have to lookup on a hash table looking for DWARF offsets, we can do the more sensible thing of just indexing the types_tags array. Now to do some cleanups and try to get the per cu encoder done. Then order all the cus per number of type entries, pick the one with more, then go on merging/recoding the types of the others and putting the parent linkage in place. Just to show the extent of the changes: $ codiff /tmp/libdwarves.so.1.0.0 build/libdwarves.so.1.0.0 /home/acme/git/pahole/dwarves.c: struct cu | -4048 struct tag | -32 struct ptr_to_member_type | -32 struct namespace | -32 struct type | -32 struct class | -32 struct base_type | -32 struct array_type | -32 struct class_member | -32 struct lexblock | -32 struct ftype | -32 struct function | -64 struct parameter | -32 struct variable | -32 struct inline_expansion | -32 struct label | -32 struct enumerator | -32 17 structs changed tag__follow_typedef | +3 tag__fprintf_decl_info | +25 array_type__fprintf | +6 type__name | -126 type__find_first_biggest_size_base_type_member | -3 typedef__fprintf | +16 imported_declaration__fprintf | +6 imported_module__fprintf | +3 cu__new | +26 cu__delete | +26 hashtags__hash | -65 hash_64 | -124 hlist_add_head | -78 hashtags__find | -157 cu__hash | -80 cu__add_tag | +20 tag__prefix | -3 cu__find_tag_by_id | -2 cu__find_type_by_id | -3 cu__find_first_typedef_of_type | +38 cu__find_base_type_by_name | +68 cu__find_base_type_by_name_and_size | +72 cu__find_struct_by_name | +59 cus__find_struct_by_name | +8 cus__find_tag_by_id | +5 cus__find_cu_by_name | -6 lexblock__find_tag_by_id | -173 cu__find_variable_by_id | -197 list__find_tag_by_id | -308 cu__find_parameter_by_id | -60 tag__ptr_name | +6 tag__name | +15 variable__type | +13 variable__name | +7 class_member__size | +6 parameter__name | -119 tag__parameter | -14 parameter__type | -143 type__fprintf | -29 union__fprintf | +6 class__add_vtable_entry | -9 type__add_member | -6 type__clone_members | -3 enumeration__add | -6 function__name | -156 ftype__has_parm_of_type | -39 class__find_holes | -27 class__has_hole_ge | -3 type__nr_members_of_type | +3 lexblock__account_inline_expansions | +3 cu__account_inline_expansions | -18 ftype__fprintf_parms | +46 function__tag_fprintf | +24 lexblock__fprintf | -6 ftype__fprintf | +3 function__fprintf_stats | -18 function__size | -6 class__vtable_fprintf | -11 class__fprintf | -21 tag__fprintf | -35 60 functions changed, 513 bytes added, 2054 bytes removed, diff: -1541 /home/acme/git/pahole/ctf_loader.c: struct ctf_short_type | +0 14 structs changed type__init | -14 type__new | -9 class__new | -12 create_new_base_type | -7 create_new_base_type_float | -7 create_new_array | -8 create_new_subroutine_type | -9 create_full_members | -18 create_short_members | -18 create_new_class | +1 create_new_union | +1 create_new_enumeration | -19 create_new_forward_decl | -2 create_new_typedef | +3 create_new_tag | -5 load_types | +16 class__fixup_ctf_bitfields | -3 17 functions changed, 21 bytes added, 131 bytes removed, diff: -110 /home/acme/git/pahole/dwarf_loader.c: 17 structs changed zalloc | -56 tag__init | +3 array_type__new | +20 type__init | -24 class_member__new | +46 inline_expansion__new | +12 class__new | +81 lexblock__init | +19 function__new | +43 die__create_new_array | +20 die__create_new_parameter | +4 die__create_new_label | +4 die__create_new_subroutine_type | +113 die__create_new_enumeration | -21 die__process_class | +79 die__process_namespace | +76 die__create_new_inline_expansion | +4 die__process_function | +147 __die__process_tag | +34 die__process_unit | +56 die__process | +90 21 functions changed, 851 bytes added, 101 bytes removed, diff: +750 /home/acme/git/pahole/dwarves.c: struct ptr_table | +16 struct cu_orig_info | +32 2 structs changed tag__decl_line | +68 tag__decl_file | +70 tag__orig_id | +71 ptr_table__init | +46 ptr_table__exit | +37 ptr_table__add | +183 ptr_table__add_with_id | +165 ptr_table__entry | +64 cu__table_add_tag | +171 cu__table_nullify_type_entry | +38 10 functions changed, 913 bytes added, diff: +913 /home/acme/git/pahole/ctf_loader.c: 2 structs changed tag__alloc | +52 1 function changed, 52 bytes added, diff: +52 /home/acme/git/pahole/dwarf_loader.c: struct dwarf_tag | +48 struct dwarf_cu | +4104 4 structs changed dwarf_cu__init | +83 hashtags__hash | +61 hash_64 | +124 hlist_add_head | +78 hashtags__find | +161 cu__hash | +95 tag__is_tag_type | +171 tag__is_type | +85 tag__is_union | +28 tag__is_struct | +57 tag__is_typedef | +28 tag__is_enumeration | +28 dwarf_cu__find_tag_by_id | +56 dwarf_cu__find_type_by_id | +63 tag__alloc | +114 __tag__print_type_not_found | +108 namespace__recode_dwarf_types | +346 tag__namespace | +14 tag__has_namespace | +86 tag__is_namespace | +28 type__recode_dwarf_specification | +182 tag__type | +14 __tag__print_abstract_origin_not_found | +105 ftype__recode_dwarf_types | +322 tag__ftype | +14 tag__parameter | +14 lexblock__recode_dwarf_types | +736 tag__lexblock | +14 tag__label | +14 tag__recode_dwarf_type | +766 tag__ptr_to_member_type | +14 cu__recode_dwarf_types_table | +88 cu__recode_dwarf_types | +48 dwarf_tag__decl_file | +77 strings__ptr | +33 dwarf_tag__decl_line | +59 dwarf_tag__orig_id | +59 dwarf_tag__orig_type | +59 38 functions changed, 4432 bytes added, diff: +4432 build/libdwarves.so.1.0.0: 147 functions changed, 6782 bytes added, 2286 bytes removed, diff: +4496 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2009-03-06 00:29:35 +01:00
/*
* FIXME: remove this check and try to fix the parameter abstract
* origin code someday...
*/
if (!function->name)
return true;
name = function__name(function);
if (show_externals && !function->external)
return true;
if (show_cc_uninlined &&
function->inlined != DW_INL_declared_not_inlined)
return true;
if (show_cc_inlined && function->inlined != DW_INL_inlined)
return true;
fstats = fn_stats__find(name);
if (fstats != NULL) {
struct function *fn = tag__function(fstats->tag);
if (!fn->external)
return false;
if (verbose)
fn_stats__chkdupdef(fn, fstats->cu, function, cu);
fstats->nr_expansions += function->cu_total_nr_inline_expansions;
fstats->size_expansions += function->cu_total_size_inline_expansions;
fstats->nr_files++;
return true;
}
return false;
}
static int cu_unique_iterator(struct cu *cu, void *cookie __maybe_unused)
{
cu__account_inline_expansions(cu);
struct function *pos;
uint32_t id;
cu__for_each_function(cu, id, pos)
if (!function__filter(pos, cu))
fn_stats__add(function__tag(pos), cu);
return 0;
}
static int cu_class_iterator(struct cu *cu, void *cookie)
{
type_id_t target_id;
dwarves: Remove some more DWARF details from the core Had to be a big sweeping change, but the regression tests shows just improvements :-) Now we stop using an id in struct tag, only storing the type, that now uses 16 bits only, as CTF does. Each format loader has to go on adding the types to the core, that figures out if it is a tag that can be on the tag->type field (tag__is_tag_type). Formats that already have the types separated and in sequence, such as CTF, just ask the core to insert in the types_table directly with its original ID. For DWARF, we ask the core to put it on the table, in sequence, and return the index, that is then stashed with the DWARF specific info (original id, type, decl_line, etc) and hashed by the original id. Later we recode everything, looking up via the original type, getting the small_id to put on the tag->type. The underlying debugging info not needed by the core is stashed in tag->priv, and the DWARF loader now just allocates sizeof(struct dwarf_tag) at the end of the core tag and points it there, and makes that info available thru cu->orig_info. In the future we can ask, when loading a cu, that this info be trown away, so that we reduce the memory footprint for big multi-cu files such as the Linux kernel. There is also a routine to ask for inserting a NULL, as we still have bugs in the CTF decoding and thus some entries are being lost, to avoid using an undefined pointer when traversing the types_table the ctf loader puts a NULL there via cu__table_nullify_type_entry() and then cu__for_each_type skips those. There is some more cleanups for leftovers that I avoided cleaning to reduce this changeset. And also while doing this I saw that enums can appear without any enumerators and that an array with DW_TAG_GNU_vector is actually a different tag, encoded this way till we get to DWARF4 ;-) So now we don't have to lookup on a hash table looking for DWARF offsets, we can do the more sensible thing of just indexing the types_tags array. Now to do some cleanups and try to get the per cu encoder done. Then order all the cus per number of type entries, pick the one with more, then go on merging/recoding the types of the others and putting the parent linkage in place. Just to show the extent of the changes: $ codiff /tmp/libdwarves.so.1.0.0 build/libdwarves.so.1.0.0 /home/acme/git/pahole/dwarves.c: struct cu | -4048 struct tag | -32 struct ptr_to_member_type | -32 struct namespace | -32 struct type | -32 struct class | -32 struct base_type | -32 struct array_type | -32 struct class_member | -32 struct lexblock | -32 struct ftype | -32 struct function | -64 struct parameter | -32 struct variable | -32 struct inline_expansion | -32 struct label | -32 struct enumerator | -32 17 structs changed tag__follow_typedef | +3 tag__fprintf_decl_info | +25 array_type__fprintf | +6 type__name | -126 type__find_first_biggest_size_base_type_member | -3 typedef__fprintf | +16 imported_declaration__fprintf | +6 imported_module__fprintf | +3 cu__new | +26 cu__delete | +26 hashtags__hash | -65 hash_64 | -124 hlist_add_head | -78 hashtags__find | -157 cu__hash | -80 cu__add_tag | +20 tag__prefix | -3 cu__find_tag_by_id | -2 cu__find_type_by_id | -3 cu__find_first_typedef_of_type | +38 cu__find_base_type_by_name | +68 cu__find_base_type_by_name_and_size | +72 cu__find_struct_by_name | +59 cus__find_struct_by_name | +8 cus__find_tag_by_id | +5 cus__find_cu_by_name | -6 lexblock__find_tag_by_id | -173 cu__find_variable_by_id | -197 list__find_tag_by_id | -308 cu__find_parameter_by_id | -60 tag__ptr_name | +6 tag__name | +15 variable__type | +13 variable__name | +7 class_member__size | +6 parameter__name | -119 tag__parameter | -14 parameter__type | -143 type__fprintf | -29 union__fprintf | +6 class__add_vtable_entry | -9 type__add_member | -6 type__clone_members | -3 enumeration__add | -6 function__name | -156 ftype__has_parm_of_type | -39 class__find_holes | -27 class__has_hole_ge | -3 type__nr_members_of_type | +3 lexblock__account_inline_expansions | +3 cu__account_inline_expansions | -18 ftype__fprintf_parms | +46 function__tag_fprintf | +24 lexblock__fprintf | -6 ftype__fprintf | +3 function__fprintf_stats | -18 function__size | -6 class__vtable_fprintf | -11 class__fprintf | -21 tag__fprintf | -35 60 functions changed, 513 bytes added, 2054 bytes removed, diff: -1541 /home/acme/git/pahole/ctf_loader.c: struct ctf_short_type | +0 14 structs changed type__init | -14 type__new | -9 class__new | -12 create_new_base_type | -7 create_new_base_type_float | -7 create_new_array | -8 create_new_subroutine_type | -9 create_full_members | -18 create_short_members | -18 create_new_class | +1 create_new_union | +1 create_new_enumeration | -19 create_new_forward_decl | -2 create_new_typedef | +3 create_new_tag | -5 load_types | +16 class__fixup_ctf_bitfields | -3 17 functions changed, 21 bytes added, 131 bytes removed, diff: -110 /home/acme/git/pahole/dwarf_loader.c: 17 structs changed zalloc | -56 tag__init | +3 array_type__new | +20 type__init | -24 class_member__new | +46 inline_expansion__new | +12 class__new | +81 lexblock__init | +19 function__new | +43 die__create_new_array | +20 die__create_new_parameter | +4 die__create_new_label | +4 die__create_new_subroutine_type | +113 die__create_new_enumeration | -21 die__process_class | +79 die__process_namespace | +76 die__create_new_inline_expansion | +4 die__process_function | +147 __die__process_tag | +34 die__process_unit | +56 die__process | +90 21 functions changed, 851 bytes added, 101 bytes removed, diff: +750 /home/acme/git/pahole/dwarves.c: struct ptr_table | +16 struct cu_orig_info | +32 2 structs changed tag__decl_line | +68 tag__decl_file | +70 tag__orig_id | +71 ptr_table__init | +46 ptr_table__exit | +37 ptr_table__add | +183 ptr_table__add_with_id | +165 ptr_table__entry | +64 cu__table_add_tag | +171 cu__table_nullify_type_entry | +38 10 functions changed, 913 bytes added, diff: +913 /home/acme/git/pahole/ctf_loader.c: 2 structs changed tag__alloc | +52 1 function changed, 52 bytes added, diff: +52 /home/acme/git/pahole/dwarf_loader.c: struct dwarf_tag | +48 struct dwarf_cu | +4104 4 structs changed dwarf_cu__init | +83 hashtags__hash | +61 hash_64 | +124 hlist_add_head | +78 hashtags__find | +161 cu__hash | +95 tag__is_tag_type | +171 tag__is_type | +85 tag__is_union | +28 tag__is_struct | +57 tag__is_typedef | +28 tag__is_enumeration | +28 dwarf_cu__find_tag_by_id | +56 dwarf_cu__find_type_by_id | +63 tag__alloc | +114 __tag__print_type_not_found | +108 namespace__recode_dwarf_types | +346 tag__namespace | +14 tag__has_namespace | +86 tag__is_namespace | +28 type__recode_dwarf_specification | +182 tag__type | +14 __tag__print_abstract_origin_not_found | +105 ftype__recode_dwarf_types | +322 tag__ftype | +14 tag__parameter | +14 lexblock__recode_dwarf_types | +736 tag__lexblock | +14 tag__label | +14 tag__recode_dwarf_type | +766 tag__ptr_to_member_type | +14 cu__recode_dwarf_types_table | +88 cu__recode_dwarf_types | +48 dwarf_tag__decl_file | +77 strings__ptr | +33 dwarf_tag__decl_line | +59 dwarf_tag__orig_id | +59 dwarf_tag__orig_type | +59 38 functions changed, 4432 bytes added, diff: +4432 build/libdwarves.so.1.0.0: 147 functions changed, 6782 bytes added, 2286 bytes removed, diff: +4496 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2009-03-06 00:29:35 +01:00
struct tag *target = cu__find_struct_by_name(cu, cookie, 0, &target_id);
if (target == NULL)
return 0;
struct function *pos;
uint32_t id;
cu__for_each_function(cu, id, pos) {
if (pos->inlined ||
!ftype__has_parm_of_type(&pos->proto, target_id, cu))
continue;
if (verbose)
tag__fprintf(function__tag(pos), cu, &conf, stdout);
else
fputs(function__name(pos), stdout);
putchar('\n');
}
return 0;
}
static int function__emit_type_definitions(struct function *func,
struct cu *cu, FILE *fp)
{
struct parameter *pos;
btf_loader: Add support for BTF_KIND_FUNC Some changes to the fprintf routines were needed, as BTF has as the function type just a BTF_KIND_FUNC_PROTO, while DWARF has as the type for a function its return value type. With a function->btf flag this was overcome and all the other goodies in pfunct are present, for instance: $ pahole -JV examples/tcp.o | grep -w FUNC | head [4068] FUNC tcp_init type_id=4067 [4070] FUNC tcp_abort type_id=4069 [4072] FUNC tcp_done type_id=4071 [4074] FUNC tcp_md5_hash_key type_id=4073 [4076] FUNC tcp_md5_hash_skb_data type_id=4075 [4078] FUNC tcp_get_md5sig_pool type_id=4077 [4080] FUNC tcp_alloc_md5sig_pool type_id=4079 [4082] FUNC compat_tcp_getsockopt type_id=4081 [4084] FUNC tcp_getsockopt type_id=4083 [4086] FUNC tcp_get_timestamping_opt_stats type_id=4085 $ $ pfunct -F btf examples/tcp.o | head memset memcpy tcp_enter_memory_pressure tcp_leave_memory_pressure tcp_init_sock tcp_init_transfer tcp_poll tcp_ioctl tcp_splice_read sk_stream_alloc_skb $ $ pfunct --prototype -F btf examples/tcp.o | head void * memset(void * p, int c, __kernel_size_t size); void * memcpy(void * p, const void * q, __kernel_size_t size); void tcp_enter_memory_pressure(struct sock * sk); void tcp_leave_memory_pressure(struct sock * sk); void tcp_init_sock(struct sock * sk); void tcp_init_transfer(struct sock * sk, int bpf_op); __poll_t tcp_poll(struct file * file, struct socket * sock, poll_table * wait); int tcp_ioctl(struct sock * sk, int cmd, long unsigned int arg); ssize_t tcp_splice_read(struct socket * sock, loff_t * ppos, struct pipe_inode_info * pipe, size_t len, unsigned int flags); struct sk_buff * sk_stream_alloc_skb(struct sock * sk, int size, gfp_t gfp, bool force_schedule); $ Now to ask just for the 'struct sock' 'methods', i.e. functions that have as one of its arguments a pointer to the given 'class' name: $ pfunct --class sock -F btf examples/tcp.o | head tcp_abort tcp_done compat_tcp_getsockopt tcp_getsockopt tcp_get_info compat_tcp_setsockopt tcp_setsockopt tcp_disconnect tcp_write_queue_purge tcp_close $ Then ask for the prototypes, which requires -V, should have that fixed: $ pfunct -V --prototypes --class sock -F btf examples/tcp.o | head int tcp_abort(struct sock * sk, int err); void tcp_done(struct sock * sk); int compat_tcp_getsockopt(struct sock * sk, int level, int optname, char * optval, int * optlen); int tcp_getsockopt(struct sock * sk, int level, int optname, char * optval, int * optlen); void tcp_get_info(struct sock * sk, struct tcp_info * info); int compat_tcp_setsockopt(struct sock * sk, int level, int optname, char * optval, unsigned int optlen); int tcp_setsockopt(struct sock * sk, int level, int optname, char * optval, unsigned int optlen); int tcp_disconnect(struct sock * sk, int flags); void tcp_write_queue_purge(struct sock * sk); void tcp_close(struct sock * sk, long int timeout); $ Don't like prototypes with parm names, got you covered: $ pfunct --no_parm_names -V --prototypes --class sock -F btf examples/tcp.o | head int tcp_abort(struct sock *, int); void tcp_done(struct sock *); int compat_tcp_getsockopt(struct sock *, int, int, char *, int *); int tcp_getsockopt(struct sock *, int, int, char *, int *); void tcp_get_info(struct sock *, struct tcp_info *); int compat_tcp_setsockopt(struct sock *, int, int, char *, unsigned int); int tcp_setsockopt(struct sock *, int, int, char *, unsigned int); int tcp_disconnect(struct sock *, int); void tcp_write_queue_purge(struct sock *); void tcp_close(struct sock *, long int); $ Don't like long options and want just one function? $ pfunct -f tcp_setsockopt -F btf examples/tcp.o int tcp_setsockopt(struct sock * sk, int level, int optname, char * optval, unsigned int optlen); $ Want to generate compileable code for all of those functions, full with the necessary types, etc? $ pfunct -F btf --compile examples/tcp.o > a.c $ gcc -c -o a.o a.c $ pfunct -F dwarf --prototypes --class sock a.o | head pfunct: a.o: No debugging information found $ gcc -g -c -o a.o a.c $ pfunct -V -F dwarf --prototypes --class sock a.o | head void tcp_enter_memory_pressure(struct sock * sk); void tcp_leave_memory_pressure(struct sock * sk); void tcp_init_sock(struct sock * sk); void tcp_init_transfer(struct sock * sk, int bpf_op); int tcp_ioctl(struct sock * sk, int cmd, long unsigned int arg); struct sk_buff * sk_stream_alloc_skb(struct sock * sk, int size, gfp_t gfp, bool force_schedule); ssize_t do_tcp_sendpages(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendpage(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendmsg_locked(struct sock * sk, struct msghdr * msg, size_t size); $ Now lets go full circle and encode BTF for this a.o generated from source code generated from the original BTF info in that examples/tcp.o file: $ pahole -JV a.o | tail [465] FUNC_PROTO (anon) return=35 args=(392 hp, 393 skb, 5 header_len) [466] FUNC tcp_md5_hash_skb_data type_id=465 [467] FUNC_PROTO (anon) return=35 args=(392 hp, 394 key) [468] FUNC tcp_md5_hash_key type_id=467 [469] FUNC_PROTO (anon) return=0 args=(49 sk) [470] FUNC tcp_done type_id=469 [471] FUNC_PROTO (anon) return=35 args=(49 sk, 35 err) [472] FUNC tcp_abort type_id=471 [473] FUNC_PROTO (anon) return=0 args=(void) [474] FUNC tcp_init type_id=473 $ $ pfunct -F btf -V --prototypes --class=sock a.o | head void tcp_enter_memory_pressure(struct sock * sk); void tcp_leave_memory_pressure(struct sock * sk); void tcp_init_sock(struct sock * sk); void tcp_init_transfer(struct sock * sk, int bpf_op); int tcp_ioctl(struct sock * sk, int cmd, long unsigned int arg); struct sk_buff * sk_stream_alloc_skb(struct sock * sk, int size, gfp_t gfp, bool force_schedule); ssize_t do_tcp_sendpages(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendpage(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendmsg_locked(struct sock * sk, struct msghdr * msg, size_t size); $ Curious about the code generated by 'pfunct -F btf --compile examples/tcp.o? http://vger.kernel.org/~acme/pahole/pfunct-F-BTF--compile-examples-tcp.o.txt Cc: Alexei Starovoitov <ast@fb.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com> Cc: Andrii Nakryiko <andriin@fb.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Yonghong Song <yhs@fb.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-11-05 15:30:51 +01:00
struct ftype *proto = func->btf ? tag__ftype(cu__type(cu, func->proto.tag.type)) : &func->proto;
struct tag *type = cu__type(cu, proto->tag.type);
retry_return_type:
/* type == NULL means the return is void */
if (type == NULL)
goto do_parameters;
if (tag__is_pointer(type) || tag__is_modifier(type)) {
type = cu__type(cu, type->type);
goto retry_return_type;
}
if (tag__is_type(type) && !tag__type(type)->definition_emitted) {
type__emit_definitions(type, cu, &emissions, fp);
type__emit(type, cu, NULL, NULL, fp);
}
do_parameters:
btf_loader: Add support for BTF_KIND_FUNC Some changes to the fprintf routines were needed, as BTF has as the function type just a BTF_KIND_FUNC_PROTO, while DWARF has as the type for a function its return value type. With a function->btf flag this was overcome and all the other goodies in pfunct are present, for instance: $ pahole -JV examples/tcp.o | grep -w FUNC | head [4068] FUNC tcp_init type_id=4067 [4070] FUNC tcp_abort type_id=4069 [4072] FUNC tcp_done type_id=4071 [4074] FUNC tcp_md5_hash_key type_id=4073 [4076] FUNC tcp_md5_hash_skb_data type_id=4075 [4078] FUNC tcp_get_md5sig_pool type_id=4077 [4080] FUNC tcp_alloc_md5sig_pool type_id=4079 [4082] FUNC compat_tcp_getsockopt type_id=4081 [4084] FUNC tcp_getsockopt type_id=4083 [4086] FUNC tcp_get_timestamping_opt_stats type_id=4085 $ $ pfunct -F btf examples/tcp.o | head memset memcpy tcp_enter_memory_pressure tcp_leave_memory_pressure tcp_init_sock tcp_init_transfer tcp_poll tcp_ioctl tcp_splice_read sk_stream_alloc_skb $ $ pfunct --prototype -F btf examples/tcp.o | head void * memset(void * p, int c, __kernel_size_t size); void * memcpy(void * p, const void * q, __kernel_size_t size); void tcp_enter_memory_pressure(struct sock * sk); void tcp_leave_memory_pressure(struct sock * sk); void tcp_init_sock(struct sock * sk); void tcp_init_transfer(struct sock * sk, int bpf_op); __poll_t tcp_poll(struct file * file, struct socket * sock, poll_table * wait); int tcp_ioctl(struct sock * sk, int cmd, long unsigned int arg); ssize_t tcp_splice_read(struct socket * sock, loff_t * ppos, struct pipe_inode_info * pipe, size_t len, unsigned int flags); struct sk_buff * sk_stream_alloc_skb(struct sock * sk, int size, gfp_t gfp, bool force_schedule); $ Now to ask just for the 'struct sock' 'methods', i.e. functions that have as one of its arguments a pointer to the given 'class' name: $ pfunct --class sock -F btf examples/tcp.o | head tcp_abort tcp_done compat_tcp_getsockopt tcp_getsockopt tcp_get_info compat_tcp_setsockopt tcp_setsockopt tcp_disconnect tcp_write_queue_purge tcp_close $ Then ask for the prototypes, which requires -V, should have that fixed: $ pfunct -V --prototypes --class sock -F btf examples/tcp.o | head int tcp_abort(struct sock * sk, int err); void tcp_done(struct sock * sk); int compat_tcp_getsockopt(struct sock * sk, int level, int optname, char * optval, int * optlen); int tcp_getsockopt(struct sock * sk, int level, int optname, char * optval, int * optlen); void tcp_get_info(struct sock * sk, struct tcp_info * info); int compat_tcp_setsockopt(struct sock * sk, int level, int optname, char * optval, unsigned int optlen); int tcp_setsockopt(struct sock * sk, int level, int optname, char * optval, unsigned int optlen); int tcp_disconnect(struct sock * sk, int flags); void tcp_write_queue_purge(struct sock * sk); void tcp_close(struct sock * sk, long int timeout); $ Don't like prototypes with parm names, got you covered: $ pfunct --no_parm_names -V --prototypes --class sock -F btf examples/tcp.o | head int tcp_abort(struct sock *, int); void tcp_done(struct sock *); int compat_tcp_getsockopt(struct sock *, int, int, char *, int *); int tcp_getsockopt(struct sock *, int, int, char *, int *); void tcp_get_info(struct sock *, struct tcp_info *); int compat_tcp_setsockopt(struct sock *, int, int, char *, unsigned int); int tcp_setsockopt(struct sock *, int, int, char *, unsigned int); int tcp_disconnect(struct sock *, int); void tcp_write_queue_purge(struct sock *); void tcp_close(struct sock *, long int); $ Don't like long options and want just one function? $ pfunct -f tcp_setsockopt -F btf examples/tcp.o int tcp_setsockopt(struct sock * sk, int level, int optname, char * optval, unsigned int optlen); $ Want to generate compileable code for all of those functions, full with the necessary types, etc? $ pfunct -F btf --compile examples/tcp.o > a.c $ gcc -c -o a.o a.c $ pfunct -F dwarf --prototypes --class sock a.o | head pfunct: a.o: No debugging information found $ gcc -g -c -o a.o a.c $ pfunct -V -F dwarf --prototypes --class sock a.o | head void tcp_enter_memory_pressure(struct sock * sk); void tcp_leave_memory_pressure(struct sock * sk); void tcp_init_sock(struct sock * sk); void tcp_init_transfer(struct sock * sk, int bpf_op); int tcp_ioctl(struct sock * sk, int cmd, long unsigned int arg); struct sk_buff * sk_stream_alloc_skb(struct sock * sk, int size, gfp_t gfp, bool force_schedule); ssize_t do_tcp_sendpages(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendpage(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendmsg_locked(struct sock * sk, struct msghdr * msg, size_t size); $ Now lets go full circle and encode BTF for this a.o generated from source code generated from the original BTF info in that examples/tcp.o file: $ pahole -JV a.o | tail [465] FUNC_PROTO (anon) return=35 args=(392 hp, 393 skb, 5 header_len) [466] FUNC tcp_md5_hash_skb_data type_id=465 [467] FUNC_PROTO (anon) return=35 args=(392 hp, 394 key) [468] FUNC tcp_md5_hash_key type_id=467 [469] FUNC_PROTO (anon) return=0 args=(49 sk) [470] FUNC tcp_done type_id=469 [471] FUNC_PROTO (anon) return=35 args=(49 sk, 35 err) [472] FUNC tcp_abort type_id=471 [473] FUNC_PROTO (anon) return=0 args=(void) [474] FUNC tcp_init type_id=473 $ $ pfunct -F btf -V --prototypes --class=sock a.o | head void tcp_enter_memory_pressure(struct sock * sk); void tcp_leave_memory_pressure(struct sock * sk); void tcp_init_sock(struct sock * sk); void tcp_init_transfer(struct sock * sk, int bpf_op); int tcp_ioctl(struct sock * sk, int cmd, long unsigned int arg); struct sk_buff * sk_stream_alloc_skb(struct sock * sk, int size, gfp_t gfp, bool force_schedule); ssize_t do_tcp_sendpages(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendpage_locked(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendpage(struct sock * sk, struct page * page, int offset, size_t size, int flags); int tcp_sendmsg_locked(struct sock * sk, struct msghdr * msg, size_t size); $ Curious about the code generated by 'pfunct -F btf --compile examples/tcp.o? http://vger.kernel.org/~acme/pahole/pfunct-F-BTF--compile-examples-tcp.o.txt Cc: Alexei Starovoitov <ast@fb.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com> Cc: Andrii Nakryiko <andriin@fb.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Yonghong Song <yhs@fb.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-11-05 15:30:51 +01:00
ftype__for_each_parameter(proto, pos) {
type = cu__type(cu, pos->tag.type);
try_again:
if (type == NULL)
continue;
if (tag__is_pointer(type) || tag__is_modifier(type)) {
type = cu__type(cu, type->type);
goto try_again;
}
if (type->tag == DW_TAG_subroutine_type) {
ftype__emit_definitions(tag__ftype(type), cu, &emissions, fp);
} else if (tag__is_type(type) && !tag__type(type)->definition_emitted) {
type__emit_definitions(type, cu, &emissions, fp);
if (!tag__is_typedef(type))
type__emit(type, cu, NULL, NULL, fp);
putchar('\n');
}
}
return 0;
}
static void function__show(struct function *func, struct cu *cu)
{
struct tag *tag = function__tag(func);
pfunct: Do not reconstruct external functions I.e. those with DW_AT_external set, to avoid regenerating multiple times things like __compiletime_assert_1504: <5><2fc41>: Abbrev Number: 100 (DW_TAG_subprogram) <2fc42> DW_AT_external : 1 <2fc42> DW_AT_name : (indirect string, offset: 0x1751f): __compiletime_assert_1504 <2fc46> DW_AT_decl_file : 1 <2fc47> DW_AT_decl_line : 1504 <2fc49> DW_AT_decl_column : 2 <2fc4a> DW_AT_prototyped : 1 <2fc4a> DW_AT_declaration : 1 <5><2fc4a>: Abbrev Number: 0 <4><2fc4b>: Abbrev Number: 0 <3><2fc4c>: Abbrev Number: 0 <2><2fc4d>: Abbrev Number: 34 (DW_TAG_lexical_block) <3><2fc4e>: Abbrev Number: 12 (DW_TAG_variable) <2fc4f> DW_AT_name : (indirect string, offset: 0xbcc6): ____ptr <2fc53> DW_AT_decl_file : 1 <2fc54> DW_AT_decl_line : 1504 <2fc56> DW_AT_decl_column : 2 <2fc57> DW_AT_type : <0x6441> <3><2fc5b>: Abbrev Number: 34 (DW_TAG_lexical_block) <4><2fc5c>: Abbrev Number: 12 (DW_TAG_variable) <2fc5d> DW_AT_name : (indirect string, offset: 0xeb74): __mptr <2fc61> DW_AT_decl_file : 1 <2fc62> DW_AT_decl_line : 1504 <2fc64> DW_AT_decl_column : 2 <2fc65> DW_AT_type : <0x5a2> <4><2fc69>: Abbrev Number: 34 (DW_TAG_lexical_block) <5><2fc6a>: Abbrev Number: 100 (DW_TAG_subprogram) <2fc6b> DW_AT_external : 1 <2fc6b> DW_AT_name : (indirect string, offset: 0x1751f): __compiletime_assert_1504 <2fc6f> DW_AT_decl_file : 1 <2fc70> DW_AT_decl_line : 1504 <2fc72> DW_AT_decl_column : 2 <2fc73> DW_AT_prototyped : 1 <2fc73> DW_AT_declaration : 1 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 02:27:41 +02:00
if (func->abstract_origin || func->external)
pfunct: Do not reconstruct inline expansions of functions I.e. those that point back to the inline function via DW_AT_abstract_origin. For instance: <1><34b65>: Abbrev Number: 156 (DW_TAG_subprogram) <34b67> DW_AT_external : 1 <34b67> DW_AT_name : (indirect string, offset: 0x2404): tcp_enter_memory_pressure <34b6b> DW_AT_decl_file : 1 <34b6c> DW_AT_decl_line : 324 <34b6e> DW_AT_decl_column : 6 <34b6f> DW_AT_prototyped : 1 <34b6f> DW_AT_inline : 1 (inlined) <SNIP> <1><37f45>: Abbrev Number: 149 (DW_TAG_subprogram) <37f47> DW_AT_abstract_origin: <0x34b65> <37f4b> DW_AT_low_pc : 0x1000 <37f53> DW_AT_high_pc : 0x48 <37f5b> DW_AT_frame_base : 1 byte block: 9c (DW_OP_call_frame_cfa) <37f5d> DW_AT_GNU_all_call_sites: 1 <37f5d> DW_AT_sibling : <0x38032> Generated by: Compilation Unit @ offset 0x0: Length: 0x3b40b (32-bit) Version: 4 Abbrev Offset: 0x0 Pointer Size: 8 <0><b>: Abbrev Number: 215 (DW_TAG_compile_unit) <d> DW_AT_producer : (indirect string, offset: 0xb0bc): GNU C89 8.2.1 20181215 (Red Hat 8.2.1-6) -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -m64 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mskip-rax-setup -mtune=generic -mno-red-zone -mcmodel=kernel -mindirect-branch=thunk-extern -mindirect-branch-register -mrecord-mcount -mfentry -march=x86-64 -g -O2 -std=gnu90 -p -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE -falign-jumps=1 -falign-loops=1 -fno-asynchronous-unwind-tables -fno-delete-null-pointer-checks -fstack-protector-strong -fno-var-tracking-assignments -fno-strict-overflow -fno-merge-all-constants -fmerge-constants -fstack-check=no -fconserve-stack --param allow-store-data-races=0 <11> DW_AT_language : 1 (ANSI C) <12> DW_AT_name : (indirect string, offset: 0x10daa): /home/acme/git/linux/net/ipv4/tcp.c <16> DW_AT_comp_dir : (indirect string, offset: 0x1d8c5): /home/acme/git/build/v5.0+ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-04-05 02:22:42 +02:00
return;
if (expand_types)
function__emit_type_definitions(func, cu, stdout);
tag__fprintf(tag, cu, &conf, stdout);
if (compilable_output) {
struct tag *type = cu__type(cu, func->proto.tag.type);
fprintf(stdout, "\n{");
if (type != NULL && type->type != 0) { /* NULL == void */
if (tag__is_pointer(type))
fprintf(stdout, "\n\treturn (void *)0;");
else if (tag__is_struct(type))
fprintf(stdout, "\n\treturn *(struct %s *)1;", class__name(tag__class(type)));
else if (tag__is_union(type))
fprintf(stdout, "\n\treturn *(union %s *)1;", type__name(tag__type(type)));
else if (tag__is_typedef(type))
fprintf(stdout, "\n\treturn *(%s *)1;", type__name(tag__type(type)));
else
fprintf(stdout, "\n\treturn 0;");
}
fprintf(stdout, "\n}\n");
}
putchar('\n');
if (show_variables || show_inline_expansions)
function__fprintf_stats(tag, cu, &conf, stdout);
}
static int cu_function_iterator(struct cu *cu, void *cookie)
{
struct function *function;
uint32_t id;
cu__for_each_function(cu, id, function) {
if (cookie && strcmp(function__name(function), cookie) != 0)
continue;
function__show(function, cu);
if (!expand_types)
return 1;
}
return 0;
}
int elf_symtab__show(char *filename)
{
int fd = open(filename, O_RDONLY), err = -1;
if (fd < 0)
return -1;
if (elf_version(EV_CURRENT) == EV_NONE) {
fprintf(stderr, "%s: cannot set libelf version.\n", __func__);
goto out_close;
}
Elf *elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
if (elf == NULL) {
fprintf(stderr, "%s: cannot read %s ELF file.\n",
__func__, filename);
goto out_close;
}
GElf_Ehdr ehdr;
if (gelf_getehdr(elf, &ehdr) == NULL) {
fprintf(stderr, "%s: cannot get elf header.\n", __func__);
goto out_elf_end;
}
struct elf_symtab *symtab = elf_symtab__new(symtab_name, elf);
if (symtab == NULL)
goto out_elf_end;
GElf_Sym sym;
uint32_t index;
int longest_name = 0;
elf_symtab__for_each_symbol(symtab, index, sym) {
if (!elf_sym__is_local_function(&sym))
continue;
int len = strlen(elf_sym__name(&sym, symtab));
if (len > longest_name)
longest_name = len;
}
if (longest_name > 32)
longest_name = 32;
int index_spacing = 0;
int nr = elf_symtab__nr_symbols(symtab);
while (nr) {
++index_spacing;
nr /= 10;
}
elf_symtab__for_each_symbol(symtab, index, sym) {
if (!elf_sym__is_local_function(&sym))
continue;
printf("%*d: %-*s %#llx %5u\n",
index_spacing, index, longest_name,
elf_sym__name(&sym, symtab),
(unsigned long long)elf_sym__value(&sym),
elf_sym__size(&sym));
}
elf_symtab__delete(symtab);
err = 0;
out_elf_end:
elf_end(elf);
out_close:
close(fd);
return err;
}
int elf_symtabs__show(char *filenames[])
{
int i = 0;
while (filenames[i] != NULL) {
if (elf_symtab__show(filenames[i]))
return EXIT_FAILURE;
++i;
}
return EXIT_SUCCESS;
}
static enum load_steal_kind pfunct_stealer(struct cu *cu,
struct conf_load *conf_load __maybe_unused,
void *thr_data __maybe_unused)
pfunct: Use a load stealer to stop as soon as a function is found When --function/-f or just using the function name as the sole arg, greatly speeding up the process. Example using /sys/kernel/btf/vmlinux: Before: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 13,199.77 msec task-clock:u # 1.000 CPUs utilized ( +- 0.27% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,426 page-faults:u # 0.714 K/sec ( +- 0.02% ) 57,793,399,298 cycles:u # 4.378 GHz ( +- 0.29% ) (83.33%) 305,498,117 stalled-cycles-frontend:u # 0.53% frontend cycles idle ( +- 2.87% ) (83.33%) 15,537,903,799 stalled-cycles-backend:u # 26.89% backend cycles idle ( +- 4.04% ) (83.33%) 126,344,414,608 instructions:u # 2.19 insn per cycle # 0.12 stalled cycles per insn ( +- 0.00% ) (83.33%) 26,880,839,847 branches:u # 2036.463 M/sec ( +- 0.01% ) (83.34%) 122,011,679 branch-misses:u # 0.45% of all branches ( +- 0.13% ) (83.33%) 13.2005 +- 0.0355 seconds time elapsed ( +- 0.27% ) $ After: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 41.89 msec task-clock:u # 0.993 CPUs utilized ( +- 11.34% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,424 page-faults:u # 0.225 M/sec ( +- 0.01% ) 117,923,321 cycles:u # 2.815 GHz ( +- 2.13% ) (82.15%) 1,014,685 stalled-cycles-frontend:u # 0.86% frontend cycles idle ( +- 5.81% ) (83.48%) 37,728,636 stalled-cycles-backend:u # 31.99% backend cycles idle ( +- 4.77% ) (83.80%) 215,262,313 instructions:u # 1.83 insn per cycle # 0.18 stalled cycles per insn ( +- 0.96% ) (83.77%) 36,786,262 branches:u # 878.162 M/sec ( +- 0.46% ) (83.80%) 338,322 branch-misses:u # 0.92% of all branches ( +- 2.10% ) (83.01%) 0.04220 +- 0.00478 seconds time elapsed ( +- 11.33% ) $ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-11-10 19:15:15 +01:00
{
if (function_name) {
struct tag *tag = cu__find_function_by_name(cu, function_name);
if (tag) {
function__show(tag__function(tag), cu);
return LSK__STOP_LOADING;
}
pfunct: Use load stealer to speed up --class We were loading everything to then iterate looking for functions with pointers to the --class argument, do it in the stealer and go on ditching the already processed data, greatly speeding up the process. $ pfunct -c perf_event_attr security_perf_event_open register_user_hw_breakpoint modify_user_hw_breakpoint perf_event_create_kernel_counter register_wide_hw_breakpoint bpf_lsm_perf_event_open modify_user_hw_breakpoint_check perf_event_create_kernel_counter $ $ $ pfunct bpf_lsm_perf_event_open int bpf_lsm_perf_event_open(struct perf_event_attr * attr, int type); $ $ for function in `pfunct -c perf_event_attr` ; do pfunct $function ; done int security_perf_event_open(struct perf_event_attr * attr, int type); struct perf_event * register_user_hw_breakpoint(struct perf_event_attr * attr, perf_overflow_handler_t triggered, void * context, struct task_struct * tsk); int modify_user_hw_breakpoint(struct perf_event * bp, struct perf_event_attr * attr); struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr * attr, int cpu, struct task_struct * task, perf_overflow_handler_t callback, void * context); struct perf_event * * register_wide_hw_breakpoint(struct perf_event_attr * attr, perf_overflow_handler_t triggered, void * context); int bpf_lsm_perf_event_open(struct perf_event_attr * attr, int type); int modify_user_hw_breakpoint_check(struct perf_event * bp, struct perf_event_attr * attr, bool check); struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr * attr, int cpu, struct task_struct * task, perf_overflow_handler_t callback, void * context); $ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-11-16 12:15:23 +01:00
} else if (class_name) {
cu_class_iterator(cu, class_name);
pfunct: Use a load stealer to stop as soon as a function is found When --function/-f or just using the function name as the sole arg, greatly speeding up the process. Example using /sys/kernel/btf/vmlinux: Before: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 13,199.77 msec task-clock:u # 1.000 CPUs utilized ( +- 0.27% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,426 page-faults:u # 0.714 K/sec ( +- 0.02% ) 57,793,399,298 cycles:u # 4.378 GHz ( +- 0.29% ) (83.33%) 305,498,117 stalled-cycles-frontend:u # 0.53% frontend cycles idle ( +- 2.87% ) (83.33%) 15,537,903,799 stalled-cycles-backend:u # 26.89% backend cycles idle ( +- 4.04% ) (83.33%) 126,344,414,608 instructions:u # 2.19 insn per cycle # 0.12 stalled cycles per insn ( +- 0.00% ) (83.33%) 26,880,839,847 branches:u # 2036.463 M/sec ( +- 0.01% ) (83.34%) 122,011,679 branch-misses:u # 0.45% of all branches ( +- 0.13% ) (83.33%) 13.2005 +- 0.0355 seconds time elapsed ( +- 0.27% ) $ After: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 41.89 msec task-clock:u # 0.993 CPUs utilized ( +- 11.34% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,424 page-faults:u # 0.225 M/sec ( +- 0.01% ) 117,923,321 cycles:u # 2.815 GHz ( +- 2.13% ) (82.15%) 1,014,685 stalled-cycles-frontend:u # 0.86% frontend cycles idle ( +- 5.81% ) (83.48%) 37,728,636 stalled-cycles-backend:u # 31.99% backend cycles idle ( +- 4.77% ) (83.80%) 215,262,313 instructions:u # 1.83 insn per cycle # 0.18 stalled cycles per insn ( +- 0.96% ) (83.77%) 36,786,262 branches:u # 878.162 M/sec ( +- 0.46% ) (83.80%) 338,322 branch-misses:u # 0.92% of all branches ( +- 2.10% ) (83.01%) 0.04220 +- 0.00478 seconds time elapsed ( +- 11.33% ) $ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-11-10 19:15:15 +01:00
}
return LSK__DELETE;
}
/* Name and version of program. */
ARGP_PROGRAM_VERSION_HOOK_DEF = dwarves_print_version;
#define ARGP_symtab 300
#define ARGP_no_parm_names 301
#define ARGP_compile 302
static const struct argp_option pfunct__options[] = {
{
.key = 'a',
.name = "addr",
.arg = "ADDR",
.doc = "show just the function that where ADDR is",
},
{
.key = 'b',
.name = "expand_types",
.doc = "Expand types needed by the prototype",
},
{
.key = 'c',
.name = "class",
.arg = "CLASS",
.doc = "functions that have CLASS pointer parameters",
},
{
.key = 'E',
.name = "externals",
.doc = "show just external functions",
},
{
.key = 'f',
.name = "function",
.arg = "FUNCTION",
.doc = "show just FUNCTION",
},
{
.name = "format_path",
.key = 'F',
.arg = "FORMAT_LIST",
.doc = "List of debugging formats to try"
},
{
.key = 'g',
.name = "goto_labels",
.doc = "show number of goto labels",
},
{
.key = 'G',
.name = "cc_uninlined",
.doc = "declared inline, uninlined by compiler",
},
{
.key = 'H',
.name = "cc_inlined",
.doc = "not declared inline, inlined by compiler",
},
{
.key = 'i',
.name = "inline_expansions",
.doc = "show inline expansions",
},
{
.key = 'I',
.name = "inline_expansions_stats",
.doc = "show inline expansions stats",
},
{
.key = 'l',
.name = "decl_info",
.doc = "show source code info",
},
{
.key = 't',
.name = "total_inline_stats",
.doc = "show Multi-CU total inline expansions stats",
},
{
.key = 's',
.name = "sizes",
.doc = "show size of functions",
},
{
.key = 'N',
.name = "function_name_len",
.doc = "show size of functions names",
},
{
.key = 'p',
.name = "nr_parms",
.doc = "show number of parameters",
},
{
.key = 'P',
.name = "prototypes",
.doc = "show function prototypes",
},
{
.key = 'S',
.name = "nr_variables",
.doc = "show number of variables",
},
{
.key = 'T',
.name = "variables",
.doc = "show variables",
},
{
.key = 'V',
.name = "verbose",
.doc = "be verbose",
},
{
.name = "symtab",
.key = ARGP_symtab,
.arg = "NAME",
.flags = OPTION_ARG_OPTIONAL,
.doc = "show symbol table NAME (Default .symtab)",
},
{
.name = "compile",
.key = ARGP_compile,
.arg = "FUNCTION",
.flags = OPTION_ARG_OPTIONAL,
.doc = "Generate compilable source code with types expanded (Default all functions)",
},
{
.name = "no_parm_names",
.key = ARGP_no_parm_names,
.doc = "Don't show parameter names",
},
{
.name = NULL,
}
};
static void (*formatter)(const struct fn_stats *f) = fn_stats_fmtr;
static int show_total_inline_expansion_stats;
static error_t pfunct__options_parser(int key, char *arg,
struct argp_state *state)
{
switch (key) {
case ARGP_KEY_INIT:
if (state->child_inputs != NULL)
state->child_inputs[0] = state->input;
break;
dwarves: Allow avoiding loading addr information As, for instance, pahole doesn't need it at all. Down from: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 17233.989563 task-clock-msecs # 0.994 CPUs ( +- 0.076% ) 1880 context-switches # 0.000 M/sec ( +- 0.159% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 26248 page-faults # 0.002 M/sec ( +- 0.000% ) 34244461105 cycles # 1987.030 M/sec ( +- 0.078% ) 34510583834 instructions # 1.008 IPC ( +- 0.001% ) 445937867 cache-references # 25.875 M/sec ( +- 0.160% ) 56898165 cache-misses # 3.302 M/sec ( +- 0.074% ) 17.335292038 seconds time elapsed ( +- 0.076% ) [acme@doppio pahole]$ To: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 16511.627334 task-clock-msecs # 0.992 CPUs ( +- 0.208% ) 1922 context-switches # 0.000 M/sec ( +- 3.068% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 25570 page-faults # 0.002 M/sec ( +- 0.000% ) 32807624343 cycles # 1986.941 M/sec ( +- 0.208% ) 32711598374 instructions # 0.997 IPC ( +- 0.001% ) 436345377 cache-references # 26.427 M/sec ( +- 0.178% ) 54044997 cache-misses # 3.273 M/sec ( +- 0.685% ) 16.652951166 seconds time elapsed ( +- 0.304% ) [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2009-07-06 18:44:57 +02:00
case 'a': addr = strtoull(arg, NULL, 0);
conf_load.get_addr_info = true; break;
case 'b': expand_types = true;
type_emissions__init(&emissions); break;
case 'c': class_name = arg; break;
case 'f': function_name = arg; break;
case 'F': conf_load.format_path = arg; break;
case 'E': show_externals = 1; break;
dwarves: Allow avoiding loading addr information As, for instance, pahole doesn't need it at all. Down from: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 17233.989563 task-clock-msecs # 0.994 CPUs ( +- 0.076% ) 1880 context-switches # 0.000 M/sec ( +- 0.159% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 26248 page-faults # 0.002 M/sec ( +- 0.000% ) 34244461105 cycles # 1987.030 M/sec ( +- 0.078% ) 34510583834 instructions # 1.008 IPC ( +- 0.001% ) 445937867 cache-references # 25.875 M/sec ( +- 0.160% ) 56898165 cache-misses # 3.302 M/sec ( +- 0.074% ) 17.335292038 seconds time elapsed ( +- 0.076% ) [acme@doppio pahole]$ To: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 16511.627334 task-clock-msecs # 0.992 CPUs ( +- 0.208% ) 1922 context-switches # 0.000 M/sec ( +- 3.068% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 25570 page-faults # 0.002 M/sec ( +- 0.000% ) 32807624343 cycles # 1986.941 M/sec ( +- 0.208% ) 32711598374 instructions # 0.997 IPC ( +- 0.001% ) 436345377 cache-references # 26.427 M/sec ( +- 0.178% ) 54044997 cache-misses # 3.273 M/sec ( +- 0.685% ) 16.652951166 seconds time elapsed ( +- 0.304% ) [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2009-07-06 18:44:57 +02:00
case 's': formatter = fn_stats_size_fmtr;
conf_load.get_addr_info = true; break;
case 'S': formatter = fn_stats_variables_fmtr; break;
case 'p': formatter = fn_stats_nr_parms_fmtr; break;
case 'P': show_prototypes = true; break;
case 'g': formatter = fn_stats_labels_fmtr; break;
case 'G': show_cc_uninlined = 1; break;
case 'H': show_cc_inlined = 1; break;
case 'i': show_inline_expansions = verbose = 1;
dwarves: Allow avoiding loading addr information As, for instance, pahole doesn't need it at all. Down from: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 17233.989563 task-clock-msecs # 0.994 CPUs ( +- 0.076% ) 1880 context-switches # 0.000 M/sec ( +- 0.159% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 26248 page-faults # 0.002 M/sec ( +- 0.000% ) 34244461105 cycles # 1987.030 M/sec ( +- 0.078% ) 34510583834 instructions # 1.008 IPC ( +- 0.001% ) 445937867 cache-references # 25.875 M/sec ( +- 0.160% ) 56898165 cache-misses # 3.302 M/sec ( +- 0.074% ) 17.335292038 seconds time elapsed ( +- 0.076% ) [acme@doppio pahole]$ To: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 16511.627334 task-clock-msecs # 0.992 CPUs ( +- 0.208% ) 1922 context-switches # 0.000 M/sec ( +- 3.068% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 25570 page-faults # 0.002 M/sec ( +- 0.000% ) 32807624343 cycles # 1986.941 M/sec ( +- 0.208% ) 32711598374 instructions # 0.997 IPC ( +- 0.001% ) 436345377 cache-references # 26.427 M/sec ( +- 0.178% ) 54044997 cache-misses # 3.273 M/sec ( +- 0.685% ) 16.652951166 seconds time elapsed ( +- 0.304% ) [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2009-07-06 18:44:57 +02:00
conf_load.extra_dbg_info = true;
conf_load.get_addr_info = true; break;
case 'I': formatter = fn_stats_inline_exps_fmtr;
conf_load.get_addr_info = true; break;
case 'l': conf.show_decl_info = 1;
conf_load.extra_dbg_info = 1; break;
dwarves: Allow avoiding loading addr information As, for instance, pahole doesn't need it at all. Down from: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 17233.989563 task-clock-msecs # 0.994 CPUs ( +- 0.076% ) 1880 context-switches # 0.000 M/sec ( +- 0.159% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 26248 page-faults # 0.002 M/sec ( +- 0.000% ) 34244461105 cycles # 1987.030 M/sec ( +- 0.078% ) 34510583834 instructions # 1.008 IPC ( +- 0.001% ) 445937867 cache-references # 25.875 M/sec ( +- 0.160% ) 56898165 cache-misses # 3.302 M/sec ( +- 0.074% ) 17.335292038 seconds time elapsed ( +- 0.076% ) [acme@doppio pahole]$ To: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 16511.627334 task-clock-msecs # 0.992 CPUs ( +- 0.208% ) 1922 context-switches # 0.000 M/sec ( +- 3.068% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 25570 page-faults # 0.002 M/sec ( +- 0.000% ) 32807624343 cycles # 1986.941 M/sec ( +- 0.208% ) 32711598374 instructions # 0.997 IPC ( +- 0.001% ) 436345377 cache-references # 26.427 M/sec ( +- 0.178% ) 54044997 cache-misses # 3.273 M/sec ( +- 0.685% ) 16.652951166 seconds time elapsed ( +- 0.304% ) [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2009-07-06 18:44:57 +02:00
case 't': show_total_inline_expansion_stats = true;
conf_load.get_addr_info = true; break;
case 'T': show_variables = 1; break;
case 'N': formatter = fn_stats_name_len_fmtr; break;
case 'V': verbose = 1;
dwarves: Allow avoiding loading addr information As, for instance, pahole doesn't need it at all. Down from: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 17233.989563 task-clock-msecs # 0.994 CPUs ( +- 0.076% ) 1880 context-switches # 0.000 M/sec ( +- 0.159% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 26248 page-faults # 0.002 M/sec ( +- 0.000% ) 34244461105 cycles # 1987.030 M/sec ( +- 0.078% ) 34510583834 instructions # 1.008 IPC ( +- 0.001% ) 445937867 cache-references # 25.875 M/sec ( +- 0.160% ) 56898165 cache-misses # 3.302 M/sec ( +- 0.074% ) 17.335292038 seconds time elapsed ( +- 0.076% ) [acme@doppio pahole]$ To: [acme@doppio pahole]$ perf stat -r 5 pahole object_samples/zweinberg\@mozilla.com/libgklayout.so > /dev/null Performance counter stats for 'pahole object_samples/zweinberg@mozilla.com/libgklayout.so' (5 runs): 16511.627334 task-clock-msecs # 0.992 CPUs ( +- 0.208% ) 1922 context-switches # 0.000 M/sec ( +- 3.068% ) 0 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 25570 page-faults # 0.002 M/sec ( +- 0.000% ) 32807624343 cycles # 1986.941 M/sec ( +- 0.208% ) 32711598374 instructions # 0.997 IPC ( +- 0.001% ) 436345377 cache-references # 26.427 M/sec ( +- 0.178% ) 54044997 cache-misses # 3.273 M/sec ( +- 0.685% ) 16.652951166 seconds time elapsed ( +- 0.304% ) [acme@doppio pahole]$ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2009-07-06 18:44:57 +02:00
conf_load.extra_dbg_info = true;
conf_load.get_addr_info = true; break;
case ARGP_symtab: symtab_name = arg ?: ".symtab"; break;
case ARGP_no_parm_names: conf.no_parm_names = 1; break;
case ARGP_compile:
expand_types = true;
type_emissions__init(&emissions);
compilable_output = true;
conf.no_semicolon = true;
conf.strip_inline = true;
if (arg)
function_name = arg;
break;
default: return ARGP_ERR_UNKNOWN;
}
return 0;
}
static const char pfunct__args_doc[] = "FILE";
static struct argp pfunct__argp = {
.options = pfunct__options,
.parser = pfunct__options_parser,
.args_doc = pfunct__args_doc,
};
int main(int argc, char *argv[])
{
int err, remaining, rc = EXIT_FAILURE;
if (argp_parse(&pfunct__argp, argc, argv, 0, &remaining, NULL) ||
(remaining == argc && class_name == NULL && function_name == NULL)) {
argp_help(&pfunct__argp, stderr, ARGP_HELP_SEE, argv[0]);
goto out;
}
if (symtab_name != NULL)
return elf_symtabs__show(argv + remaining);
if (dwarves__init()) {
fputs("pfunct: insufficient memory\n", stderr);
goto out;
}
dwarves__resolve_cacheline_size(&conf_load, 0);
struct cus *cus = cus__new();
if (cus == NULL) {
fputs("pfunct: insufficient memory\n", stderr);
goto out_dwarves_exit;
}
pfunct: Use load stealer to speed up --class We were loading everything to then iterate looking for functions with pointers to the --class argument, do it in the stealer and go on ditching the already processed data, greatly speeding up the process. $ pfunct -c perf_event_attr security_perf_event_open register_user_hw_breakpoint modify_user_hw_breakpoint perf_event_create_kernel_counter register_wide_hw_breakpoint bpf_lsm_perf_event_open modify_user_hw_breakpoint_check perf_event_create_kernel_counter $ $ $ pfunct bpf_lsm_perf_event_open int bpf_lsm_perf_event_open(struct perf_event_attr * attr, int type); $ $ for function in `pfunct -c perf_event_attr` ; do pfunct $function ; done int security_perf_event_open(struct perf_event_attr * attr, int type); struct perf_event * register_user_hw_breakpoint(struct perf_event_attr * attr, perf_overflow_handler_t triggered, void * context, struct task_struct * tsk); int modify_user_hw_breakpoint(struct perf_event * bp, struct perf_event_attr * attr); struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr * attr, int cpu, struct task_struct * task, perf_overflow_handler_t callback, void * context); struct perf_event * * register_wide_hw_breakpoint(struct perf_event_attr * attr, perf_overflow_handler_t triggered, void * context); int bpf_lsm_perf_event_open(struct perf_event_attr * attr, int type); int modify_user_hw_breakpoint_check(struct perf_event * bp, struct perf_event_attr * attr, bool check); struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr * attr, int cpu, struct task_struct * task, perf_overflow_handler_t callback, void * context); $ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-11-16 12:15:23 +01:00
if (function_name || class_name)
pfunct: Use a load stealer to stop as soon as a function is found When --function/-f or just using the function name as the sole arg, greatly speeding up the process. Example using /sys/kernel/btf/vmlinux: Before: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 13,199.77 msec task-clock:u # 1.000 CPUs utilized ( +- 0.27% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,426 page-faults:u # 0.714 K/sec ( +- 0.02% ) 57,793,399,298 cycles:u # 4.378 GHz ( +- 0.29% ) (83.33%) 305,498,117 stalled-cycles-frontend:u # 0.53% frontend cycles idle ( +- 2.87% ) (83.33%) 15,537,903,799 stalled-cycles-backend:u # 26.89% backend cycles idle ( +- 4.04% ) (83.33%) 126,344,414,608 instructions:u # 2.19 insn per cycle # 0.12 stalled cycles per insn ( +- 0.00% ) (83.33%) 26,880,839,847 branches:u # 2036.463 M/sec ( +- 0.01% ) (83.34%) 122,011,679 branch-misses:u # 0.45% of all branches ( +- 0.13% ) (83.33%) 13.2005 +- 0.0355 seconds time elapsed ( +- 0.27% ) $ After: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 41.89 msec task-clock:u # 0.993 CPUs utilized ( +- 11.34% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,424 page-faults:u # 0.225 M/sec ( +- 0.01% ) 117,923,321 cycles:u # 2.815 GHz ( +- 2.13% ) (82.15%) 1,014,685 stalled-cycles-frontend:u # 0.86% frontend cycles idle ( +- 5.81% ) (83.48%) 37,728,636 stalled-cycles-backend:u # 31.99% backend cycles idle ( +- 4.77% ) (83.80%) 215,262,313 instructions:u # 1.83 insn per cycle # 0.18 stalled cycles per insn ( +- 0.96% ) (83.77%) 36,786,262 branches:u # 878.162 M/sec ( +- 0.46% ) (83.80%) 338,322 branch-misses:u # 0.92% of all branches ( +- 2.10% ) (83.01%) 0.04220 +- 0.00478 seconds time elapsed ( +- 11.33% ) $ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-11-10 19:15:15 +01:00
conf_load.steal = pfunct_stealer;
try_sole_arg_as_function_name:
err = cus__load_files(cus, &conf_load, argv + remaining);
if (err != 0) {
if (function_name == NULL) {
function_name = argv[remaining];
if (access(function_name, R_OK) == 0) {
fprintf(stderr, "pfunct: file '%s' has no %s type information.\n",
function_name, conf_load.format_path ?: "supported");
goto out_dwarves_exit;
}
pfunct: Use a load stealer to stop as soon as a function is found When --function/-f or just using the function name as the sole arg, greatly speeding up the process. Example using /sys/kernel/btf/vmlinux: Before: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 13,199.77 msec task-clock:u # 1.000 CPUs utilized ( +- 0.27% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,426 page-faults:u # 0.714 K/sec ( +- 0.02% ) 57,793,399,298 cycles:u # 4.378 GHz ( +- 0.29% ) (83.33%) 305,498,117 stalled-cycles-frontend:u # 0.53% frontend cycles idle ( +- 2.87% ) (83.33%) 15,537,903,799 stalled-cycles-backend:u # 26.89% backend cycles idle ( +- 4.04% ) (83.33%) 126,344,414,608 instructions:u # 2.19 insn per cycle # 0.12 stalled cycles per insn ( +- 0.00% ) (83.33%) 26,880,839,847 branches:u # 2036.463 M/sec ( +- 0.01% ) (83.34%) 122,011,679 branch-misses:u # 0.45% of all branches ( +- 0.13% ) (83.33%) 13.2005 +- 0.0355 seconds time elapsed ( +- 0.27% ) $ After: $ perf stat -r5 pfunct tcp_v4_rcv int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); int tcp_v4_rcv(struct sk_buff * skb); Performance counter stats for 'pfunct tcp_v4_rcv' (5 runs): 41.89 msec task-clock:u # 0.993 CPUs utilized ( +- 11.34% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 9,424 page-faults:u # 0.225 M/sec ( +- 0.01% ) 117,923,321 cycles:u # 2.815 GHz ( +- 2.13% ) (82.15%) 1,014,685 stalled-cycles-frontend:u # 0.86% frontend cycles idle ( +- 5.81% ) (83.48%) 37,728,636 stalled-cycles-backend:u # 31.99% backend cycles idle ( +- 4.77% ) (83.80%) 215,262,313 instructions:u # 1.83 insn per cycle # 0.18 stalled cycles per insn ( +- 0.96% ) (83.77%) 36,786,262 branches:u # 878.162 M/sec ( +- 0.46% ) (83.80%) 338,322 branch-misses:u # 0.92% of all branches ( +- 2.10% ) (83.01%) 0.04220 +- 0.00478 seconds time elapsed ( +- 11.33% ) $ Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-11-10 19:15:15 +01:00
conf_load.steal = pfunct_stealer;
remaining = argc;
goto try_sole_arg_as_function_name;
}
cus__fprintf_load_files_err(cus, "pfunct", argv + remaining, err, stderr);
goto out_cus_delete;
}
cus__for_each_cu(cus, cu_unique_iterator, NULL, NULL);
if (addr) {
struct cu *cu;
struct function *f = cus__find_function_at_addr(cus, addr, &cu);
if (f == NULL) {
fprintf(stderr, "pfunct: No function found at %#llx!\n",
(unsigned long long)addr);
goto out_cus_delete;
}
function__show(f, cu);
} else if (show_total_inline_expansion_stats)
print_total_inline_stats();
else if (function_name != NULL || expand_types)
cus__for_each_cu(cus, cu_function_iterator,
function_name, NULL);
else
print_fn_stats(formatter);
rc = EXIT_SUCCESS;
out_cus_delete:
cus__delete(cus);
fn_stats__delete_list();
out_dwarves_exit:
dwarves__exit();
out:
return rc;
}