pahole: Add support for referencing header variables when pretty printing

To know from where to start reading some variable sized type we need
some information from a domain specific tool such as 'perf report -D'
for perf.data files, i.e.:

  $ perf report -D -i perf.data
  # To display the perf.data header info, please use --header/--header-only options.
  #

  0x130 [0x20]: event: 79
  .
  . ... raw event: size 32 bytes
  .  0000:  4f 00 00 00 00 00 20 00 1f 00 00 00 00 00 00 00  O..... .........
  .  0010:  31 30 9b 3c 00 00 00 00 2e 53 f8 0c 52 8c 01 00  10.<.....S<F8>.R...

  0 0x130 [0x20]: PERF_RECORD_TIME_CONV: unhandled!

So we see that 0x130 is where the first PERF_RECORD_* event is located,
and we can use that with --seek_bytes and with some extra info we can
decode variable sized records:

  $ pahole --seek_bytes=0x130 -C 'perf_event_header(sizeof=size,type=type,type_enum=perf_event_type)' --skip 2 --count 3 ~/bin/perf < perf.data
  {
  	.type = 0x4a,
  	.misc = 0,
  	.size = 0x20,
  },
  {
  	.header = {
  		.type = PERF_RECORD_CGROUP,
  		.misc = 0,
  		.size = 0x28,
  	},
  	.id = 0x1,
  	.path = "/",
  },
  {
  	.header = {
  		.type = PERF_RECORD_CGROUP,
  		.misc = 0,
  		.size = 0x30,
  	},
  	.id = 0x611,
  	.path = "/system.slice",
  },
  $

But if we decode the perf.data file header:

  $ pahole ~/bin/perf -C perf_file_header --count 1 < perf.data
  {
  	.magic = 0x32454c4946524550,
  	.size = 0x68,
  	.attr_size = 0x88,
  	.attrs = {
  		.offset = 0xa8,
  		.size = 0x88,
  	},
  	.data = {
  		.offset = 0x130,
  		.size = 0x588,
  	},
  	.event_types = {
  		.offset = 0,
  		.size = 0,
  	},
  	.adds_features = { 0x16717ffc, 0, 0, 0 },
  },
  $

We see that that 0x130 offset is at field perf_file_header->data.offset,
so lets automate this so that we can try to extract that value and then
use it with --seek_bytes:

  $ pahole --header=perf_file_header --seek_bytes='$header.data.offset' -C 'perf_event_header(sizeof=size,type=type,type_enum=perf_event_type)' --skip 2 --count 3 ~/bin/perf < perf.data
  pahole: the type enum 'perf_event_type' wasn't found in 'util/header.c'
  $

The problem here is that pahole tries to avoid processing all the CUs
(compile units) in a binary, as, so far, it only needed to process one
main type at a time, i.e. the ones in -C/--class.

Now we need multiple types, in the above example we need:

  struct perf_event_header
  struct pref_file_header
  enum perf_event_type

And in this case, the perf binary doesn't have any object/CU that has
all these three types.

To see if the code works we can resort to using BTF, that combines all
types into just one "CU", deduplicating them in the process, so:

  $ pahole --btf_encode ~/bin/perf

And now it works:

  $ pahole -V -F btf --header=perf_file_header --seek_bytes='$header.data.offset' -C 'perf_event_header(sizeof=size,type=type,type_enum=perf_event_type)' --skip 2 --count 3 ~/bin/perf < perf.data
  pahole: sizeof_operator for 'perf_event_header' is 'size'
  pahole: type member for 'perf_event_header' is 'type'
  pahole: type enum for 'perf_event_header' is 'perf_event_type'
  pahole: seek bytes evaluated from --seek_bytes=$header.data.offset is 0x130
  {
  	.type = 0x4a,
  	.misc = 0,
  	.size = 0x20,
  },
  {
  	.header = {
  		.type = PERF_RECORD_CGROUP,
  		.misc = 0,
  		.size = 0x28,
  	},
  	.id = 0x1,
  	.path = "/",
  },
  {
  	.header = {
  		.type = PERF_RECORD_CGROUP,
  		.misc = 0,
  		.size = 0x30,
  	},
  	.id = 0x611,
  	.path = "/system.slice",
  },
  $

In the next csets a fallback approach will allow for this to work even
with DWARF, when we'll notice that stdin wasn't consumed and thus we
need to search for the types needed with cus__find_struct_by_name() &
friends.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Arnaldo Carvalho de Melo 2020-07-20 08:25:45 -03:00
parent 611f5e8bd7
commit 9310b04854
2 changed files with 155 additions and 7 deletions

View File

@ -57,7 +57,8 @@ struct conf_load {
*
* @count - Just like 'dd', stop pretty printing input after 'count' records
* @skip - Just like 'dd', skip 'count' records when pretty printing input
* @seek_bytes - Number of bytes to seek, if stdin only from start, when we have --pretty FILE, then from the end as well with negative numbers
* @seek_bytes - Number of bytes to seek, if stdin only from start, when we have --pretty FILE, then from the end as well with negative numbers,
* may be of the form $header.MEMBER_NAME when using with --header.
* @flat_arrays - a->foo[10][2] becomes a->foo[20]
* @classes_as_structs - class f becomes struct f, CTF doesn't have a "class"
* @cachelinep - pointer to current cacheline, so that when expanding types we keep track of it,
@ -74,7 +75,8 @@ struct conf_fprintf {
uint32_t base_offset;
uint32_t count;
uint32_t *cachelinep;
off_t seek_bytes;
const char *seek_bytes;
const char *header_type;
uint32_t skip;
uint8_t indent;
uint8_t expand_types:1;

156
pahole.c
View File

@ -806,6 +806,7 @@ ARGP_PROGRAM_VERSION_HOOK_DEF = dwarves_print_version;
#define ARGP_count 311
#define ARGP_skip 312
#define ARGP_seek_bytes 313
#define ARGP_header_type 314
static const struct argp_option pahole__options[] = {
{
@ -844,6 +845,12 @@ static const struct argp_option pahole__options[] = {
.arg = "BYTES",
.doc = "Seek COUNT input records"
},
{
.name = "header_type",
.key = ARGP_header_type,
.arg = "TYPE",
.doc = "File header type"
},
{
.name = "find_pointers_to",
.key = 'f',
@ -1179,7 +1186,9 @@ static error_t pahole__options_parser(int key, char *arg,
case ARGP_skip:
conf.skip = atoi(arg); break;
case ARGP_seek_bytes:
conf.seek_bytes = strtol(arg, NULL, 0); break;
conf.seek_bytes = arg; break;
case ARGP_header_type:
conf.header_type = arg; break;
default:
return ARGP_ERR_UNKNOWN;
}
@ -1537,6 +1546,88 @@ static struct tag *tag__real_type(struct tag *tag, struct cu *cu, void *instance
return tag;
}
struct type_instance {
struct type *type;
char instance[0];
};
static struct type_instance *type_instance__new(struct cu *cu, const char *name)
{
struct type *type = tag__type(cu__find_type_by_name(cu, name, false, NULL));
if (type == NULL)
return NULL;
struct type_instance *instance = malloc(sizeof(*instance) + type->size);
if (instance)
instance->type = type;
return instance;
}
static void type_instance__delete(struct type_instance *instance)
{
instance->type = NULL;
free(instance);
}
static int64_t type_instance__int_value(struct type_instance *instance, struct cu *cu, const char *member_name_orig)
{
struct class_member *member = type__find_member_by_name(instance->type, cu, member_name_orig);
int byte_offset = 0;
if (!member) {
char *sep = strchr(member_name_orig, '.');
if (!sep)
return -1;
char *member_name_alloc = strdup(member_name_orig);
if (!member_name_alloc)
return -1;
char *member_name = member_name_alloc;
struct type *type = instance->type;
sep = member_name_alloc + (sep - member_name_orig);
*sep = 0;
while (1) {
member = type__find_member_by_name(type, cu, member_name);
if (!member) {
out_free_member_name:
free(member_name_alloc);
return -1;
}
byte_offset += member->byte_offset;
type = tag__type(cu__type(cu, member->tag.type));
if (type == NULL)
goto out_free_member_name;
member_name = sep + 1;
sep = strchr(member_name, '.');
if (!sep)
break;
}
member = type__find_member_by_name(type, cu, member_name);
free(member_name_alloc);
if (member == NULL)
return -1;
}
byte_offset += member->byte_offset;
struct tag *member_type = cu__type(cu, member->tag.type);
if (!tag__is_base_type(member_type, cu))
return -1;
return base_type__value(&instance->instance[byte_offset], member->byte_size);
}
static int tag__stdio_fprintf_value(struct tag *type, struct cu *cu, FILE *fp)
{
int _sizeof = tag__size(type, cu), printed = 0;
@ -1548,10 +1639,62 @@ static int tag__stdio_fprintf_value(struct tag *type, struct cu *cu, FILE *fp)
if (instance == NULL)
return -ENOMEM;
if (conf.seek_bytes && pipe_seek(stdin, conf.seek_bytes) < 0) {
int err = --errno;
fprintf(stderr, "Couldn't --seek_bytes %ld\n", conf.seek_bytes);
return err;
if (conf.seek_bytes) {
off_t seek_bytes;
if (strstarts(conf.seek_bytes, "$header.")) {
if (!conf.header_type) {
fprintf(stderr, "pahole: --seek_bytes (%s) makes reference to --header but it wasn't specified\n",
conf.seek_bytes);
return -EINVAL;
}
struct type_instance *header = type_instance__new(cu, conf.header_type);
if (!header) {
fprintf(stderr, "pahole: --header (%s) type not found in %s\n", conf.header_type, cu->name);
return -ESRCH;
}
if (fread(header->instance, header->type->size, 1, stdin) != 1) {
int err = --errno;
fprintf(stderr, "pahole: --header (%s) type not be read\n", conf.header_type);
return err;
}
const char *member_name = conf.seek_bytes + sizeof("$header.") - 1;
int64_t value = type_instance__int_value(header, cu, member_name);
if (value < 0) {
fprintf(stderr, "pahole: couldn't read the '%s' member of '%s' for evaluating --seek_bytes=%s\n",
member_name, conf.header_type, conf.seek_bytes);
return -ESRCH;
}
seek_bytes = value;
if (global_verbose)
fprintf(stdout, "pahole: seek bytes evaluated from --seek_bytes=%s is %#" PRIx64 " \n",
conf.seek_bytes, seek_bytes);
if (seek_bytes < header->type->size) {
fprintf(stderr, "pahole: seek bytes evaluated from --seek_bytes=%s is less than the header type size\n",
conf.seek_bytes);
return -EINVAL;
}
// Since we're reading stdin, we need to account for already read header:
seek_bytes -= header->type->size;
type_instance__delete(header);
} else {
seek_bytes = strtol(conf.seek_bytes, NULL, 0);
}
if (pipe_seek(stdin, seek_bytes) < 0) {
int err = --errno;
fprintf(stderr, "Couldn't --seek_bytes %s (%" PRIu64 "\n", conf.seek_bytes, seek_bytes);
return err;
}
}
while (fread(instance, _sizeof, 1, stdin) == 1) {
@ -1788,6 +1931,9 @@ free_and_stop:
if (class == NULL)
continue; // couldn't find that class name in this CU, continue to the next one.
if (conf.header_type && !cu__find_type_by_name(cu, conf.header_type, false, NULL))
continue; // we need a CU with both the class and the header type
if (!tag__is_struct(class)) {
fprintf(stderr, "pahole: attributes are only supported with 'class' and 'struct' types\n");
free(name);