dwarves/pahole.c

720 lines
17 KiB
C
Raw Normal View History

/*
Copyright (C) 2006 Mandriva Conectiva S.A.
Copyright (C) 2006 Arnaldo Carvalho de Melo <acme@mandriva.com>
Copyright (C) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
This program is free software; you can redistribute it and/or modify it
under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation.
*/
#include <argp.h>
#include <stdio.h>
#include <dwarf.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include "dwarves_reorganize.h"
#include "dwarves.h"
static uint8_t class__include_anonymous;
static uint8_t class__include_nested_anonymous;
static char *class__exclude_prefix;
static size_t class__exclude_prefix_len;
static char *cu__exclude_prefix;
static size_t cu__exclude_prefix_len;
static char *decl_exclude_prefix;
static size_t decl_exclude_prefix_len;
static uint16_t nr_holes;
static uint16_t nr_bit_holes;
[LIB]: Introduce class__has_hole_ge() That returns if the class has a hole greater or equal to the size specified. Pahole now has a --hole_size_ge command line option to use it. Example on a linux kernel built for x86_64 where we list the structs that have holes bigger than 32 bytes, that provides an approximation of structs with ____cacheline_aligned_in_smp annotated members: [acme@filo pahole]$ pahole --hole_size_ge 32 examples/vmlinux-x86_64 inet_hashinfo rcu_ctrlblk hh_cache net_device files_struct module zone For instance, look at struct zone clever use of such construct: _pad1_ is defined with ZONE_PADDING(_pad1_), that is: /* <40e> /home/acme/git/net-2.6.22/include/linux/mmzone.h:179 */ struct zone { long unsigned int pages_min; /* 0 8 */ long unsigned int pages_low; /* 8 8 */ long unsigned int pages_high; /* 16 8 */ long unsigned int lowmem_reserve[3]; /* 24 24 */ int node; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ long unsigned int min_unmapped_pages; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ long unsigned int min_slab_pages; /* 64 8 */ struct per_cpu_pageset * pageset[255]; /* 72 2040 */ /* --- cacheline 33 boundary (2112 bytes) --- */ spinlock_t lock; /* 2112 4 */ /* XXX 4 bytes hole, try to pack */ struct free_area free_area[11]; /* 2120 264 */ /* XXX 48 bytes hole, try to pack */ /* --- cacheline 38 boundary (2432 bytes) --- */ struct zone_padding _pad1_; /* 2432 0 */ spinlock_t lru_lock; /* 2432 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head active_list; /* 2440 16 */ struct list_head inactive_list; /* 2456 16 */ long unsigned int nr_scan_active; /* 2472 8 */ long unsigned int nr_scan_inactive; /* 2480 8 */ long unsigned int pages_scanned; /* 2488 8 */ /* --- cacheline 39 boundary (2496 bytes) --- */ int all_unreclaimable; /* 2496 4 */ atomic_t reclaim_in_progress; /* 2500 4 */ atomic_long_t vm_stat[20]; /* 2504 160 */ /* --- cacheline 41 boundary (2624 bytes) was 40 bytes ago --- */ int prev_priority; /* 2664 4 */ /* XXX 20 bytes hole, try to pack */ /* --- cacheline 42 boundary (2688 bytes) --- */ struct zone_padding _pad2_; /* 2688 0 */ wait_queue_head_t * wait_table; /* 2688 8 */ long unsigned int wait_table_hash_nr_entries; /* 2696 8 */ long unsigned int wait_table_bits; /* 2704 8 */ struct pglist_data * zone_pgdat; /* 2712 8 */ long unsigned int zone_start_pfn; /* 2720 8 */ long unsigned int spanned_pages; /* 2728 8 */ long unsigned int present_pages; /* 2736 8 */ const char * name; /* 2744 8 */ /* --- cacheline 43 boundary (2752 bytes) --- */ }; /* size: 2752, cachelines: 43 */ /* sum members: 2672, holes: 5, sum holes: 80 */ /* definitions: 933 */ Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 18:32:53 +02:00
static uint16_t hole_size_ge;
static uint8_t show_packable;
[PAHOLE]: Reorganize bitfields This cset also does a fixup for cases where the compiler keeps the type specified by the programmer for a bitfield but uses less space to combine with the next, non-bitfield member, these cases can be caught using plain pahole and will appear with this comment: /* --- cacheline 1 boundary (64 bytes) --- */ int bitfield1:1; /* 64 4 */ int bitfield2:1; /* 64 4 */ /* XXX 14 bits hole, try to pack */ /* Bitfield WARNING: DWARF size=4, real size=2 */ short int d; /* 66 2 */ The fixup is done prior to reorganizing the fields. Now an example of this code in action: [acme@filo examples]$ cat swiss_cheese.c <SNIP> struct cheese { char id; short number; char name[52]; int a:1; int b; int bitfield1:1; int bitfield2:1; short d; short e; short last:5; }; <SNIP> [acme@filo examples]$ Lets look at the layout: [acme@filo examples]$ pahole swiss_cheese cheese /* <11b> /home/acme/git/pahole/examples/swiss_cheese.c:3 */ struct cheese { char id; /* 0 1 */ /* XXX 1 byte hole, try to pack */ short int number; /* 2 2 */ char name[52]; /* 4 52 */ int a:1; /* 56 4 */ /* XXX 31 bits hole, try to pack */ int b; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ int bitfield1:1; /* 64 4 */ int bitfield2:1; /* 64 4 */ /* XXX 14 bits hole, try to pack */ /* Bitfield WARNING: DWARF size=4, real size=2 */ short int d; /* 66 2 */ short int e; /* 68 2 */ short int last:5; /* 70 2 */ }; /* size: 72, cachelines: 2 */ /* sum members: 71, holes: 1, sum holes: 1 */ /* bit holes: 2, sum bit holes: 45 bits */ /* bit_padding: 11 bits */ /* last cacheline: 8 bytes */ [acme@filo examples]$ Full of holes, has bit padding and uses more than one 64 bytes cacheline. Now lets ask pahole to reorganize it: [acme@filo examples]$ pahole --reorganize --verbose swiss_cheese cheese /* Demoting bitfield ('a' ... 'a') from 'int' to 'unsigned char' */ /* Demoting bitfield ('bitfield1' ... 'bitfield2') from 'short unsigned int' to 'unsigned char' */ /* Demoting bitfield ('last') from 'short int' to 'unsigned char' */ /* Moving 'bitfield2:1' from after 'bitfield1' to after 'a:1' */ /* Moving 'bitfield1:1' from after 'b' to after 'bitfield2:1' */ /* Moving 'last:5' from after 'e' to after 'bitfield1:1' */ /* Moving bitfield('a' ... 'last') from after 'name' to after 'id' */ /* Moving 'e' from after 'd' to after 'b' */ /* <11b> /home/acme/git/pahole/examples/swiss_cheese.c:3 */ struct cheese { char id; /* 0 1 */ unsigned char a:1; /* 1 1 */ unsigned char bitfield2:1; /* 1 1 */ unsigned char bitfield1:1; /* 1 1 */ unsigned char last:5; /* 1 1 */ short int number; /* 2 2 */ char name[52]; /* 4 52 */ int b; /* 56 4 */ short int e; /* 60 2 */ short int d; /* 62 2 */ /* --- cacheline 1 boundary (64 bytes) --- */ }; /* size: 64, cachelines: 1 */ /* saved 8 bytes and 1 cacheline! */ [acme@filo examples]$ Instant karma, it gets completely packed, and look ma, no __attribute__((packed)) :-) With this struct task_struct in the linux kernel is shrunk by 12 bytes, there is more 4 bytes to save with another technique that involves not combining holes, but using the last single hole to fill it with members at the tail of the struct. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2007-02-01 13:51:16 +01:00
static uint8_t global_verbose;
[PAHOLE]: Introduce --recursive For now only affects the --contains output. Example showing the structs that include struct list_head in a linux kernel module: [acme@filo pahole]$ pahole --recursive --contains list_head examples/ipv6.ko.debug.x86-64 inet_protosw proto sock_iocb key_type msg_queue msg_msg nf_hook_ops softnet_data net_device softnet_data dma_device dma_client dma_chan class_device net_device softnet_data dma_chan class klist_node device_driver device klist device_driver bus_type device file_system_type nfs_lock_info file_lock block_device address_space inode dquot mem_dqinfo super_block inode signal_struct page kioctx file kiocb work_struct delayed_work kioctx timer_list ifmcaddr6 inet6_dev inet6_ifaddr neigh_table neighbour net_device softnet_data sock inet_sock delayed_work kioctx plist_head task_struct sigpending signal_struct task_struct user_struct device dev_pm_info device mutex_waiter mutex seq_file block_device quota_info super_block dquot super_block inode zone per_cpu_pages free_area kset bus_type subsystem class bus_type __wait_queue_head __wait_queue rw_semaphore quota_info super_block super_block inode key blocking_notifier_head bus_type subsystem class bus_type mm_struct dentry vm_area_struct kobject class_device net_device softnet_data dma_chan device_driver module_kobject module device kset bus_type subsystem class bus_type lock_class module mm_struct task_struct Handling in multi-cu objects is not very precise, as the same struct has different dwarf offsets (id) in each CU. A mitigation for this problem will be provided with the --cu_list and --cu_name upcoming options, where one will be able to get a list of the object files in a, for instance, linux kernel .ko module and also to specify a cu name to be the only to be considered when processing multi-cu files (again, such as .ko linux kernel modules). This ends up being also useful to generate a reverse class hierarchy :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 00:11:51 +02:00
static uint8_t recursive;
static size_t cacheline_size;
static uint8_t find_containers;
static int reorganize;
static int show_reorg_steps;
static char *class_name;
static char separator = '\t';
struct conf_fprintf conf = {
.emit_stats = 1,
};
struct structure {
struct list_head node;
const struct class *class;
const struct cu *cu;
uint32_t nr_files;
uint32_t nr_methods;
};
static struct structure *structure__new(const struct class *class,
const struct cu *cu)
{
struct structure *self = malloc(sizeof(*self));
if (self != NULL) {
self->class = class;
self->cu = cu;
self->nr_files = 1;
self->nr_methods = 0;
}
return self;
}
static LIST_HEAD(structures__list);
static struct structure *structures__find(const char *name)
{
struct structure *pos;
if (name == NULL)
return NULL;
list_for_each_entry(pos, &structures__list, node) {
const struct class *c = pos->class;
const char *cname = class__name(c);
if (cname == NULL) {
if (class__include_anonymous) {
const struct tag *tdef =
cu__find_first_typedef_of_type(pos->cu,
class__tag(c)->id);
if (tdef == NULL)
continue;
cname = class__name(tag__class(tdef));
if (cname == NULL)
continue;
} else
continue;
}
if (strcmp(cname, name) == 0)
return pos;
}
return NULL;
}
static void structures__add(const struct class *class, const struct cu *cu)
{
struct structure *str = structure__new(class, cu);
if (str != NULL)
list_add(&str->node, &structures__list);
}
static void nr_definitions_formatter(const struct structure *self)
{
printf("%s%c%u\n", class__name(self->class), separator,
self->nr_files);
}
static void nr_members_formatter(const struct structure *self)
{
printf("%s%c%u\n", class__name(self->class), separator,
class__nr_members(self->class));
}
static void nr_methods_formatter(const struct structure *self)
{
printf("%s%c%u\n", class__name(self->class), separator,
self->nr_methods);
}
static void size_formatter(const struct structure *self)
{
printf("%s%c%zd%c%u\n", class__name(self->class), separator,
class__size(self->class), separator,
self->class->nr_holes);
}
static void class_name_len_formatter(const struct structure *self)
{
const char *name = class__name(self->class);
printf("%s%c%zd\n", name, separator, strlen(name));
}
[LIB]: Introduce class__has_hole_ge() That returns if the class has a hole greater or equal to the size specified. Pahole now has a --hole_size_ge command line option to use it. Example on a linux kernel built for x86_64 where we list the structs that have holes bigger than 32 bytes, that provides an approximation of structs with ____cacheline_aligned_in_smp annotated members: [acme@filo pahole]$ pahole --hole_size_ge 32 examples/vmlinux-x86_64 inet_hashinfo rcu_ctrlblk hh_cache net_device files_struct module zone For instance, look at struct zone clever use of such construct: _pad1_ is defined with ZONE_PADDING(_pad1_), that is: /* <40e> /home/acme/git/net-2.6.22/include/linux/mmzone.h:179 */ struct zone { long unsigned int pages_min; /* 0 8 */ long unsigned int pages_low; /* 8 8 */ long unsigned int pages_high; /* 16 8 */ long unsigned int lowmem_reserve[3]; /* 24 24 */ int node; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ long unsigned int min_unmapped_pages; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ long unsigned int min_slab_pages; /* 64 8 */ struct per_cpu_pageset * pageset[255]; /* 72 2040 */ /* --- cacheline 33 boundary (2112 bytes) --- */ spinlock_t lock; /* 2112 4 */ /* XXX 4 bytes hole, try to pack */ struct free_area free_area[11]; /* 2120 264 */ /* XXX 48 bytes hole, try to pack */ /* --- cacheline 38 boundary (2432 bytes) --- */ struct zone_padding _pad1_; /* 2432 0 */ spinlock_t lru_lock; /* 2432 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head active_list; /* 2440 16 */ struct list_head inactive_list; /* 2456 16 */ long unsigned int nr_scan_active; /* 2472 8 */ long unsigned int nr_scan_inactive; /* 2480 8 */ long unsigned int pages_scanned; /* 2488 8 */ /* --- cacheline 39 boundary (2496 bytes) --- */ int all_unreclaimable; /* 2496 4 */ atomic_t reclaim_in_progress; /* 2500 4 */ atomic_long_t vm_stat[20]; /* 2504 160 */ /* --- cacheline 41 boundary (2624 bytes) was 40 bytes ago --- */ int prev_priority; /* 2664 4 */ /* XXX 20 bytes hole, try to pack */ /* --- cacheline 42 boundary (2688 bytes) --- */ struct zone_padding _pad2_; /* 2688 0 */ wait_queue_head_t * wait_table; /* 2688 8 */ long unsigned int wait_table_hash_nr_entries; /* 2696 8 */ long unsigned int wait_table_bits; /* 2704 8 */ struct pglist_data * zone_pgdat; /* 2712 8 */ long unsigned int zone_start_pfn; /* 2720 8 */ long unsigned int spanned_pages; /* 2728 8 */ long unsigned int present_pages; /* 2736 8 */ const char * name; /* 2744 8 */ /* --- cacheline 43 boundary (2752 bytes) --- */ }; /* size: 2752, cachelines: 43 */ /* sum members: 2672, holes: 5, sum holes: 80 */ /* definitions: 933 */ Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 18:32:53 +02:00
static void class_name_formatter(const struct structure *self)
{
puts(class__name(self->class));
}
static void class_formatter(const struct structure *self)
{
struct tag *typedef_alias = NULL;
struct tag *tag = class__tag(self->class);
const char *name = class__name(self->class);
if (name == NULL) {
/*
* Find the first typedef for this struct, this is enough
* as if we optimize the struct all the typedefs will be
* affected.
*/
typedef_alias = cu__find_first_typedef_of_type(self->cu,
tag->id);
/*
* If there is no typedefs for this anonymous struct it is
* found just inside another struct, and in this case it'll
* be printed when the type it is in is printed, but if
* the user still wants to see its statistics, just use
* --nested_anon_include.
*/
if (typedef_alias == NULL && !class__include_nested_anonymous)
return;
}
if (typedef_alias != NULL) {
const struct type *tdef = tag__type(typedef_alias);
conf.prefix = "typedef";
conf.suffix = type__name(tdef);
} else
conf.prefix = conf.suffix = NULL;
tag__fprintf(tag, self->cu, &conf, stdout);
printf(" /* definitions: %u */\n", self->nr_files);
putchar('\n');
}
static void print_classes(void (*formatter)(const struct structure *s))
{
struct structure *pos;
list_for_each_entry(pos, &structures__list, node)
if (show_packable && !global_verbose) {
const struct class *c = pos->class;
const struct tag *t = class__tag(c);
const size_t orig_size = class__size(c);
const size_t new_size = class__size(c->priv);
const size_t savings = orig_size - new_size;
const char *name = class__name(c);
/* Anonymous struct? Try finding a typedef */
if (name == NULL) {
const struct tag *tdef =
cu__find_first_typedef_of_type(pos->cu,
t->id);
if (tdef != NULL)
name = class__name(tag__class(tdef));
}
if (name != NULL)
printf("%s%c%zd%c%zd%c%zd\n",
name, separator,
orig_size, separator,
new_size, separator,
savings);
else
printf("%s(%d)%c%zd%c%zd%c%zd\n",
t->decl_file, t->decl_line, separator,
orig_size, separator,
new_size, separator,
savings);
} else
formatter(pos);
}
static struct cu *cu__filter(struct cu *cu)
{
if (cu__exclude_prefix != NULL &&
(cu->name == NULL ||
strncmp(cu__exclude_prefix, cu->name,
cu__exclude_prefix_len) == 0))
return NULL;
return cu;
}
static int class__packable(struct class *self, const struct cu *cu)
{
struct class *clone;
size_t savings;
if (self->nr_holes == 0 && self->nr_bit_holes == 0)
return 0;
clone = class__clone(self, NULL);
if (clone == NULL)
return 0;
class__reorganize(clone, cu, 0, stdout);
savings = class__size(self) - class__size(clone);
if (savings != 0) {
self->priv = clone;
return 1;
}
class__delete(clone);
return 0;
}
static void class__dupmsg(const struct class *self, const struct cu *cu,
const struct class *dup __unused,
const struct cu *dup_cu,
char *hdr, const char *fmt, ...)
{
va_list args;
if (!*hdr)
printf("class: %s\nfirst: %s\ncurrent: %s\n",
class__name(self), cu->name, dup_cu->name);
va_start(args, fmt);
vprintf(fmt, args);
va_end(args);
*hdr = 1;
}
static void class__chkdupdef(const struct class *self, const struct cu *cu,
struct class *dup, const struct cu *dup_cu)
{
char hdr = 0;
if (class__size(self) != class__size(dup))
class__dupmsg(self, cu, dup, dup_cu,
&hdr, "size: %u != %u\n",
class__size(self), class__size(dup));
if (class__nr_members(self) != class__nr_members(dup))
class__dupmsg(self, cu, dup, dup_cu,
&hdr, "nr_members: %u != %u\n",
class__nr_members(self), class__nr_members(dup));
if (self->nr_holes != dup->nr_holes)
class__dupmsg(self, cu, dup, dup_cu,
&hdr, "nr_holes: %u != %u\n",
self->nr_holes, dup->nr_holes);
if (self->nr_bit_holes != dup->nr_bit_holes)
class__dupmsg(self, cu, dup, dup_cu,
&hdr, "nr_bit_holes: %u != %u\n",
self->nr_bit_holes, dup->nr_bit_holes);
if (self->padding != dup->padding)
class__dupmsg(self, cu, dup, dup_cu,
&hdr, "padding: %u != %u\n",
self->padding, dup->padding);
/* XXX put more checks here: member types, member ordering, etc */
if (hdr)
putchar('\n');
}
static struct tag *tag__filter(struct tag *tag, struct cu *cu,
void *cookie __unused)
{
struct structure *str;
struct class *class;
const char *name;
if (tag->tag != DW_TAG_structure_type)
return NULL;
class = tag__class(tag);
name = class__name(class);
if (class__is_declaration(class))
return NULL;
if (!class__include_anonymous && name == NULL)
return NULL;
if (class__exclude_prefix != NULL) {
if (name == NULL) {
const struct tag *tdef =
cu__find_first_typedef_of_type(cu, tag->id);
if (tdef != NULL)
name = class__name(tag__class(tdef));
}
if (name != NULL && strncmp(class__exclude_prefix, name,
class__exclude_prefix_len) == 0)
return NULL;
}
if (decl_exclude_prefix != NULL &&
(tag->decl_file == NULL ||
strncmp(decl_exclude_prefix, tag->decl_file,
decl_exclude_prefix_len) == 0))
return NULL;
class__find_holes(class, cu);
if (class->nr_holes < nr_holes ||
[LIB]: Introduce class__has_hole_ge() That returns if the class has a hole greater or equal to the size specified. Pahole now has a --hole_size_ge command line option to use it. Example on a linux kernel built for x86_64 where we list the structs that have holes bigger than 32 bytes, that provides an approximation of structs with ____cacheline_aligned_in_smp annotated members: [acme@filo pahole]$ pahole --hole_size_ge 32 examples/vmlinux-x86_64 inet_hashinfo rcu_ctrlblk hh_cache net_device files_struct module zone For instance, look at struct zone clever use of such construct: _pad1_ is defined with ZONE_PADDING(_pad1_), that is: /* <40e> /home/acme/git/net-2.6.22/include/linux/mmzone.h:179 */ struct zone { long unsigned int pages_min; /* 0 8 */ long unsigned int pages_low; /* 8 8 */ long unsigned int pages_high; /* 16 8 */ long unsigned int lowmem_reserve[3]; /* 24 24 */ int node; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ long unsigned int min_unmapped_pages; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ long unsigned int min_slab_pages; /* 64 8 */ struct per_cpu_pageset * pageset[255]; /* 72 2040 */ /* --- cacheline 33 boundary (2112 bytes) --- */ spinlock_t lock; /* 2112 4 */ /* XXX 4 bytes hole, try to pack */ struct free_area free_area[11]; /* 2120 264 */ /* XXX 48 bytes hole, try to pack */ /* --- cacheline 38 boundary (2432 bytes) --- */ struct zone_padding _pad1_; /* 2432 0 */ spinlock_t lru_lock; /* 2432 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head active_list; /* 2440 16 */ struct list_head inactive_list; /* 2456 16 */ long unsigned int nr_scan_active; /* 2472 8 */ long unsigned int nr_scan_inactive; /* 2480 8 */ long unsigned int pages_scanned; /* 2488 8 */ /* --- cacheline 39 boundary (2496 bytes) --- */ int all_unreclaimable; /* 2496 4 */ atomic_t reclaim_in_progress; /* 2500 4 */ atomic_long_t vm_stat[20]; /* 2504 160 */ /* --- cacheline 41 boundary (2624 bytes) was 40 bytes ago --- */ int prev_priority; /* 2664 4 */ /* XXX 20 bytes hole, try to pack */ /* --- cacheline 42 boundary (2688 bytes) --- */ struct zone_padding _pad2_; /* 2688 0 */ wait_queue_head_t * wait_table; /* 2688 8 */ long unsigned int wait_table_hash_nr_entries; /* 2696 8 */ long unsigned int wait_table_bits; /* 2704 8 */ struct pglist_data * zone_pgdat; /* 2712 8 */ long unsigned int zone_start_pfn; /* 2720 8 */ long unsigned int spanned_pages; /* 2728 8 */ long unsigned int present_pages; /* 2736 8 */ const char * name; /* 2744 8 */ /* --- cacheline 43 boundary (2752 bytes) --- */ }; /* size: 2752, cachelines: 43 */ /* sum members: 2672, holes: 5, sum holes: 80 */ /* definitions: 933 */ Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 18:32:53 +02:00
class->nr_bit_holes < nr_bit_holes ||
(hole_size_ge != 0 && !class__has_hole_ge(class, hole_size_ge)))
return NULL;
str = structures__find(name);
if (str != NULL) {
[PAHOLE]: Reorganize bitfields This cset also does a fixup for cases where the compiler keeps the type specified by the programmer for a bitfield but uses less space to combine with the next, non-bitfield member, these cases can be caught using plain pahole and will appear with this comment: /* --- cacheline 1 boundary (64 bytes) --- */ int bitfield1:1; /* 64 4 */ int bitfield2:1; /* 64 4 */ /* XXX 14 bits hole, try to pack */ /* Bitfield WARNING: DWARF size=4, real size=2 */ short int d; /* 66 2 */ The fixup is done prior to reorganizing the fields. Now an example of this code in action: [acme@filo examples]$ cat swiss_cheese.c <SNIP> struct cheese { char id; short number; char name[52]; int a:1; int b; int bitfield1:1; int bitfield2:1; short d; short e; short last:5; }; <SNIP> [acme@filo examples]$ Lets look at the layout: [acme@filo examples]$ pahole swiss_cheese cheese /* <11b> /home/acme/git/pahole/examples/swiss_cheese.c:3 */ struct cheese { char id; /* 0 1 */ /* XXX 1 byte hole, try to pack */ short int number; /* 2 2 */ char name[52]; /* 4 52 */ int a:1; /* 56 4 */ /* XXX 31 bits hole, try to pack */ int b; /* 60 4 */ /* --- cacheline 1 boundary (64 bytes) --- */ int bitfield1:1; /* 64 4 */ int bitfield2:1; /* 64 4 */ /* XXX 14 bits hole, try to pack */ /* Bitfield WARNING: DWARF size=4, real size=2 */ short int d; /* 66 2 */ short int e; /* 68 2 */ short int last:5; /* 70 2 */ }; /* size: 72, cachelines: 2 */ /* sum members: 71, holes: 1, sum holes: 1 */ /* bit holes: 2, sum bit holes: 45 bits */ /* bit_padding: 11 bits */ /* last cacheline: 8 bytes */ [acme@filo examples]$ Full of holes, has bit padding and uses more than one 64 bytes cacheline. Now lets ask pahole to reorganize it: [acme@filo examples]$ pahole --reorganize --verbose swiss_cheese cheese /* Demoting bitfield ('a' ... 'a') from 'int' to 'unsigned char' */ /* Demoting bitfield ('bitfield1' ... 'bitfield2') from 'short unsigned int' to 'unsigned char' */ /* Demoting bitfield ('last') from 'short int' to 'unsigned char' */ /* Moving 'bitfield2:1' from after 'bitfield1' to after 'a:1' */ /* Moving 'bitfield1:1' from after 'b' to after 'bitfield2:1' */ /* Moving 'last:5' from after 'e' to after 'bitfield1:1' */ /* Moving bitfield('a' ... 'last') from after 'name' to after 'id' */ /* Moving 'e' from after 'd' to after 'b' */ /* <11b> /home/acme/git/pahole/examples/swiss_cheese.c:3 */ struct cheese { char id; /* 0 1 */ unsigned char a:1; /* 1 1 */ unsigned char bitfield2:1; /* 1 1 */ unsigned char bitfield1:1; /* 1 1 */ unsigned char last:5; /* 1 1 */ short int number; /* 2 2 */ char name[52]; /* 4 52 */ int b; /* 56 4 */ short int e; /* 60 2 */ short int d; /* 62 2 */ /* --- cacheline 1 boundary (64 bytes) --- */ }; /* size: 64, cachelines: 1 */ /* saved 8 bytes and 1 cacheline! */ [acme@filo examples]$ Instant karma, it gets completely packed, and look ma, no __attribute__((packed)) :-) With this struct task_struct in the linux kernel is shrunk by 12 bytes, there is more 4 bytes to save with another technique that involves not combining holes, but using the last single hole to fill it with members at the tail of the struct. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2007-02-01 13:51:16 +01:00
if (global_verbose)
class__chkdupdef(str->class, str->cu, class, cu);
str->nr_files++;
return NULL;
}
if (show_packable && !class__packable(class, cu))
return NULL;
return tag;
}
static int unique_iterator(struct tag *tag, struct cu *cu,
void *cookie __unused)
{
structures__add(tag__class(tag), cu);
return 0;
}
static int cu_unique_iterator(struct cu *cu, void *cookie)
{
return cu__for_each_tag(cu, unique_iterator, cookie, tag__filter);
}
static struct tag *nr_methods__filter(struct tag *tag, struct cu *cu __unused,
void *cookie __unused)
{
if (tag->tag != DW_TAG_subprogram)
return NULL;
if (function__declared_inline(tag__function(tag)))
return NULL;
return tag;
}
static int nr_methods_iterator(struct tag *tag, struct cu *cu,
void *cookie __unused)
{
struct parameter *pos;
struct structure *str;
struct type *ctype;
list_for_each_entry(pos, &tag__ftype(tag)->parms, tag.node) {
struct tag *type =
cu__find_tag_by_id(cu, parameter__type(pos, cu));
if (type == NULL || type->tag != DW_TAG_pointer_type)
continue;
type = cu__find_tag_by_id(cu, type->type);
if (type == NULL || type->tag != DW_TAG_structure_type)
continue;
ctype = tag__type(type);
if (type__name(ctype) == NULL)
continue;
str = structures__find(type__name(ctype));
if (str != NULL)
++str->nr_methods;
}
return 0;
}
static int cu_nr_methods_iterator(struct cu *cu, void *cookie)
{
return cu__for_each_tag(cu, nr_methods_iterator, cookie,
nr_methods__filter);
}
[PAHOLE]: Introduce --recursive For now only affects the --contains output. Example showing the structs that include struct list_head in a linux kernel module: [acme@filo pahole]$ pahole --recursive --contains list_head examples/ipv6.ko.debug.x86-64 inet_protosw proto sock_iocb key_type msg_queue msg_msg nf_hook_ops softnet_data net_device softnet_data dma_device dma_client dma_chan class_device net_device softnet_data dma_chan class klist_node device_driver device klist device_driver bus_type device file_system_type nfs_lock_info file_lock block_device address_space inode dquot mem_dqinfo super_block inode signal_struct page kioctx file kiocb work_struct delayed_work kioctx timer_list ifmcaddr6 inet6_dev inet6_ifaddr neigh_table neighbour net_device softnet_data sock inet_sock delayed_work kioctx plist_head task_struct sigpending signal_struct task_struct user_struct device dev_pm_info device mutex_waiter mutex seq_file block_device quota_info super_block dquot super_block inode zone per_cpu_pages free_area kset bus_type subsystem class bus_type __wait_queue_head __wait_queue rw_semaphore quota_info super_block super_block inode key blocking_notifier_head bus_type subsystem class bus_type mm_struct dentry vm_area_struct kobject class_device net_device softnet_data dma_chan device_driver module_kobject module device kset bus_type subsystem class bus_type lock_class module mm_struct task_struct Handling in multi-cu objects is not very precise, as the same struct has different dwarf offsets (id) in each CU. A mitigation for this problem will be provided with the --cu_list and --cu_name upcoming options, where one will be able to get a list of the object files in a, for instance, linux kernel .ko module and also to specify a cu name to be the only to be considered when processing multi-cu files (again, such as .ko linux kernel modules). This ends up being also useful to generate a reverse class hierarchy :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 00:11:51 +02:00
static char tab[128];
static void print_containers(const struct structure *s, int ident)
{
struct structure *pos;
const Dwarf_Off type = s->class->type.namespace.tag.id;
list_for_each_entry(pos, &structures__list, node) {
const struct class *c = pos->class;
const uint32_t n = type__nr_members_of_type(&c->type, type);
if (n != 0) {
[PAHOLE]: Introduce --recursive For now only affects the --contains output. Example showing the structs that include struct list_head in a linux kernel module: [acme@filo pahole]$ pahole --recursive --contains list_head examples/ipv6.ko.debug.x86-64 inet_protosw proto sock_iocb key_type msg_queue msg_msg nf_hook_ops softnet_data net_device softnet_data dma_device dma_client dma_chan class_device net_device softnet_data dma_chan class klist_node device_driver device klist device_driver bus_type device file_system_type nfs_lock_info file_lock block_device address_space inode dquot mem_dqinfo super_block inode signal_struct page kioctx file kiocb work_struct delayed_work kioctx timer_list ifmcaddr6 inet6_dev inet6_ifaddr neigh_table neighbour net_device softnet_data sock inet_sock delayed_work kioctx plist_head task_struct sigpending signal_struct task_struct user_struct device dev_pm_info device mutex_waiter mutex seq_file block_device quota_info super_block dquot super_block inode zone per_cpu_pages free_area kset bus_type subsystem class bus_type __wait_queue_head __wait_queue rw_semaphore quota_info super_block super_block inode key blocking_notifier_head bus_type subsystem class bus_type mm_struct dentry vm_area_struct kobject class_device net_device softnet_data dma_chan device_driver module_kobject module device kset bus_type subsystem class bus_type lock_class module mm_struct task_struct Handling in multi-cu objects is not very precise, as the same struct has different dwarf offsets (id) in each CU. A mitigation for this problem will be provided with the --cu_list and --cu_name upcoming options, where one will be able to get a list of the object files in a, for instance, linux kernel .ko module and also to specify a cu name to be the only to be considered when processing multi-cu files (again, such as .ko linux kernel modules). This ends up being also useful to generate a reverse class hierarchy :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 00:11:51 +02:00
printf("%.*s%s", ident * 2, tab, class__name(c));
if (global_verbose)
printf(": %u", n);
putchar('\n');
[PAHOLE]: Introduce --recursive For now only affects the --contains output. Example showing the structs that include struct list_head in a linux kernel module: [acme@filo pahole]$ pahole --recursive --contains list_head examples/ipv6.ko.debug.x86-64 inet_protosw proto sock_iocb key_type msg_queue msg_msg nf_hook_ops softnet_data net_device softnet_data dma_device dma_client dma_chan class_device net_device softnet_data dma_chan class klist_node device_driver device klist device_driver bus_type device file_system_type nfs_lock_info file_lock block_device address_space inode dquot mem_dqinfo super_block inode signal_struct page kioctx file kiocb work_struct delayed_work kioctx timer_list ifmcaddr6 inet6_dev inet6_ifaddr neigh_table neighbour net_device softnet_data sock inet_sock delayed_work kioctx plist_head task_struct sigpending signal_struct task_struct user_struct device dev_pm_info device mutex_waiter mutex seq_file block_device quota_info super_block dquot super_block inode zone per_cpu_pages free_area kset bus_type subsystem class bus_type __wait_queue_head __wait_queue rw_semaphore quota_info super_block super_block inode key blocking_notifier_head bus_type subsystem class bus_type mm_struct dentry vm_area_struct kobject class_device net_device softnet_data dma_chan device_driver module_kobject module device kset bus_type subsystem class bus_type lock_class module mm_struct task_struct Handling in multi-cu objects is not very precise, as the same struct has different dwarf offsets (id) in each CU. A mitigation for this problem will be provided with the --cu_list and --cu_name upcoming options, where one will be able to get a list of the object files in a, for instance, linux kernel .ko module and also to specify a cu name to be the only to be considered when processing multi-cu files (again, such as .ko linux kernel modules). This ends up being also useful to generate a reverse class hierarchy :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 00:11:51 +02:00
if (recursive)
print_containers(pos, ident + 1);
}
}
}
static const struct argp_option pahole__options[] = {
{
.name = "bit_holes",
.key = 'B',
.arg = "NR_HOLES",
.doc = "Show only structs at least NR_HOLES bit holes"
},
{
.name = "cacheline_size",
.key = 'c',
.arg = "SIZE",
.doc = "set cacheline size to SIZE"
},
{
.name = "class_name",
.key = 'C',
.arg = "CLASS_NAME",
.doc = "Show just this class"
},
{
.name = "contains",
.key = 'i',
.arg = "CLASS_NAME",
.doc = "Show classes that contains CLASS_NAME"
},
{
.name = "holes",
.key = 'H',
.arg = "NR_HOLES",
[LIB]: Introduce class__has_hole_ge() That returns if the class has a hole greater or equal to the size specified. Pahole now has a --hole_size_ge command line option to use it. Example on a linux kernel built for x86_64 where we list the structs that have holes bigger than 32 bytes, that provides an approximation of structs with ____cacheline_aligned_in_smp annotated members: [acme@filo pahole]$ pahole --hole_size_ge 32 examples/vmlinux-x86_64 inet_hashinfo rcu_ctrlblk hh_cache net_device files_struct module zone For instance, look at struct zone clever use of such construct: _pad1_ is defined with ZONE_PADDING(_pad1_), that is: /* <40e> /home/acme/git/net-2.6.22/include/linux/mmzone.h:179 */ struct zone { long unsigned int pages_min; /* 0 8 */ long unsigned int pages_low; /* 8 8 */ long unsigned int pages_high; /* 16 8 */ long unsigned int lowmem_reserve[3]; /* 24 24 */ int node; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ long unsigned int min_unmapped_pages; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ long unsigned int min_slab_pages; /* 64 8 */ struct per_cpu_pageset * pageset[255]; /* 72 2040 */ /* --- cacheline 33 boundary (2112 bytes) --- */ spinlock_t lock; /* 2112 4 */ /* XXX 4 bytes hole, try to pack */ struct free_area free_area[11]; /* 2120 264 */ /* XXX 48 bytes hole, try to pack */ /* --- cacheline 38 boundary (2432 bytes) --- */ struct zone_padding _pad1_; /* 2432 0 */ spinlock_t lru_lock; /* 2432 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head active_list; /* 2440 16 */ struct list_head inactive_list; /* 2456 16 */ long unsigned int nr_scan_active; /* 2472 8 */ long unsigned int nr_scan_inactive; /* 2480 8 */ long unsigned int pages_scanned; /* 2488 8 */ /* --- cacheline 39 boundary (2496 bytes) --- */ int all_unreclaimable; /* 2496 4 */ atomic_t reclaim_in_progress; /* 2500 4 */ atomic_long_t vm_stat[20]; /* 2504 160 */ /* --- cacheline 41 boundary (2624 bytes) was 40 bytes ago --- */ int prev_priority; /* 2664 4 */ /* XXX 20 bytes hole, try to pack */ /* --- cacheline 42 boundary (2688 bytes) --- */ struct zone_padding _pad2_; /* 2688 0 */ wait_queue_head_t * wait_table; /* 2688 8 */ long unsigned int wait_table_hash_nr_entries; /* 2696 8 */ long unsigned int wait_table_bits; /* 2704 8 */ struct pglist_data * zone_pgdat; /* 2712 8 */ long unsigned int zone_start_pfn; /* 2720 8 */ long unsigned int spanned_pages; /* 2728 8 */ long unsigned int present_pages; /* 2736 8 */ const char * name; /* 2744 8 */ /* --- cacheline 43 boundary (2752 bytes) --- */ }; /* size: 2752, cachelines: 43 */ /* sum members: 2672, holes: 5, sum holes: 80 */ /* definitions: 933 */ Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 18:32:53 +02:00
.doc = "show only structs with at least NR_HOLES holes",
},
{
.name = "hole_size_ge",
.key = 'z',
.arg = "HOLE_SIZE",
.doc = "show only structs with at least one hole greater "
"or equal to HOLE_SIZE",
},
{
.name = "packable",
[PAHOLE]: Use cus__loadfl, i.e. libdwfl Now we have: [acme@filo pahole]$ pahole --help Usage: pahole [OPTION...] [FILE] {[CLASS]} -a, --anon_include include anonymous classes -A, --nested_anon_include include nested (inside other structs) anonymous classes -B, --bit_holes=NR_HOLES Show only structs at least NR_HOLES bit holes -c, --cacheline_size=SIZE set cacheline size to SIZE -D, --decl_exclude=PREFIX exclude classes declared in files with PREFIX -E, --expand_types expand class members -H, --holes=NR_HOLES show only structs at least NR_HOLES holes -m, --nr_methods show number of methods -n, --nr_members show number of members -N, --class_name_len show size of classes -P, --packable show only structs that has holes that can be packed -R, --reorganize reorg struct trying to kill holes -s, --sizes show size of classes -S, --show_reorg_steps show the struct layout at each reorganization step -t, --nr_definitions show how many times struct was defined -V, --verbose be verbose -x, --exclude=PREFIX exclude PREFIXed classes -X, --cu_exclude=PREFIX exclude PREFIXed compilation units Input Selection: --debuginfo-path=PATH Search path for separate debuginfo files -e, --executable=FILE Find addresses in FILE -k, --kernel Find addresses in the running kernel -K, --offline-kernel[=RELEASE] Kernel with all modules -M, --linux-process-map=FILE Find addresses in files mapped as read from FILE in Linux /proc/PID/maps format -p, --pid=PID Find addresses in files mapped into process PID -?, --help Give this help list --usage Give a short usage message Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-03-30 18:25:51 +02:00
.key = 'P',
.doc = "show only structs that has holes that can be packed",
},
{
.name = "expand_types",
[PAHOLE]: Use cus__loadfl, i.e. libdwfl Now we have: [acme@filo pahole]$ pahole --help Usage: pahole [OPTION...] [FILE] {[CLASS]} -a, --anon_include include anonymous classes -A, --nested_anon_include include nested (inside other structs) anonymous classes -B, --bit_holes=NR_HOLES Show only structs at least NR_HOLES bit holes -c, --cacheline_size=SIZE set cacheline size to SIZE -D, --decl_exclude=PREFIX exclude classes declared in files with PREFIX -E, --expand_types expand class members -H, --holes=NR_HOLES show only structs at least NR_HOLES holes -m, --nr_methods show number of methods -n, --nr_members show number of members -N, --class_name_len show size of classes -P, --packable show only structs that has holes that can be packed -R, --reorganize reorg struct trying to kill holes -s, --sizes show size of classes -S, --show_reorg_steps show the struct layout at each reorganization step -t, --nr_definitions show how many times struct was defined -V, --verbose be verbose -x, --exclude=PREFIX exclude PREFIXed classes -X, --cu_exclude=PREFIX exclude PREFIXed compilation units Input Selection: --debuginfo-path=PATH Search path for separate debuginfo files -e, --executable=FILE Find addresses in FILE -k, --kernel Find addresses in the running kernel -K, --offline-kernel[=RELEASE] Kernel with all modules -M, --linux-process-map=FILE Find addresses in files mapped as read from FILE in Linux /proc/PID/maps format -p, --pid=PID Find addresses in files mapped into process PID -?, --help Give this help list --usage Give a short usage message Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-03-30 18:25:51 +02:00
.key = 'E',
.doc = "expand class members",
},
{
.name = "nr_members",
.key = 'n',
.doc = "show number of members",
},
{
.name = "rel_offset",
.key = 'r',
.doc = "show relative offsets of members in inner structs"
},
[PAHOLE]: Introduce --recursive For now only affects the --contains output. Example showing the structs that include struct list_head in a linux kernel module: [acme@filo pahole]$ pahole --recursive --contains list_head examples/ipv6.ko.debug.x86-64 inet_protosw proto sock_iocb key_type msg_queue msg_msg nf_hook_ops softnet_data net_device softnet_data dma_device dma_client dma_chan class_device net_device softnet_data dma_chan class klist_node device_driver device klist device_driver bus_type device file_system_type nfs_lock_info file_lock block_device address_space inode dquot mem_dqinfo super_block inode signal_struct page kioctx file kiocb work_struct delayed_work kioctx timer_list ifmcaddr6 inet6_dev inet6_ifaddr neigh_table neighbour net_device softnet_data sock inet_sock delayed_work kioctx plist_head task_struct sigpending signal_struct task_struct user_struct device dev_pm_info device mutex_waiter mutex seq_file block_device quota_info super_block dquot super_block inode zone per_cpu_pages free_area kset bus_type subsystem class bus_type __wait_queue_head __wait_queue rw_semaphore quota_info super_block super_block inode key blocking_notifier_head bus_type subsystem class bus_type mm_struct dentry vm_area_struct kobject class_device net_device softnet_data dma_chan device_driver module_kobject module device kset bus_type subsystem class bus_type lock_class module mm_struct task_struct Handling in multi-cu objects is not very precise, as the same struct has different dwarf offsets (id) in each CU. A mitigation for this problem will be provided with the --cu_list and --cu_name upcoming options, where one will be able to get a list of the object files in a, for instance, linux kernel .ko module and also to specify a cu name to be the only to be considered when processing multi-cu files (again, such as .ko linux kernel modules). This ends up being also useful to generate a reverse class hierarchy :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 00:11:51 +02:00
{
.name = "recursive",
.key = 'd',
.doc = "recursive mode, affects several other flags",
},
{
.name = "reorganize",
[PAHOLE]: Use cus__loadfl, i.e. libdwfl Now we have: [acme@filo pahole]$ pahole --help Usage: pahole [OPTION...] [FILE] {[CLASS]} -a, --anon_include include anonymous classes -A, --nested_anon_include include nested (inside other structs) anonymous classes -B, --bit_holes=NR_HOLES Show only structs at least NR_HOLES bit holes -c, --cacheline_size=SIZE set cacheline size to SIZE -D, --decl_exclude=PREFIX exclude classes declared in files with PREFIX -E, --expand_types expand class members -H, --holes=NR_HOLES show only structs at least NR_HOLES holes -m, --nr_methods show number of methods -n, --nr_members show number of members -N, --class_name_len show size of classes -P, --packable show only structs that has holes that can be packed -R, --reorganize reorg struct trying to kill holes -s, --sizes show size of classes -S, --show_reorg_steps show the struct layout at each reorganization step -t, --nr_definitions show how many times struct was defined -V, --verbose be verbose -x, --exclude=PREFIX exclude PREFIXed classes -X, --cu_exclude=PREFIX exclude PREFIXed compilation units Input Selection: --debuginfo-path=PATH Search path for separate debuginfo files -e, --executable=FILE Find addresses in FILE -k, --kernel Find addresses in the running kernel -K, --offline-kernel[=RELEASE] Kernel with all modules -M, --linux-process-map=FILE Find addresses in files mapped as read from FILE in Linux /proc/PID/maps format -p, --pid=PID Find addresses in files mapped into process PID -?, --help Give this help list --usage Give a short usage message Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-03-30 18:25:51 +02:00
.key = 'R',
.doc = "reorg struct trying to kill holes",
},
{
.name = "show_reorg_steps",
.key = 'S',
.doc = "show the struct layout at each reorganization step",
},
{
.name = "class_name_len",
.key = 'N',
.doc = "show size of classes",
},
{
.name = "nr_methods",
.key = 'm',
.doc = "show number of methods",
},
{
.name = "sizes",
.key = 's',
.doc = "show size of classes",
},
{
.name = "separator",
.key = 't',
.arg = "SEP",
.doc = "use SEP as the field separator",
},
{
.name = "nr_definitions",
.key = 'T',
.doc = "show how many times struct was defined",
},
{
.name = "decl_exclude",
.key = 'D',
.arg = "PREFIX",
.doc = "exclude classes declared in files with PREFIX",
},
{
.name = "exclude",
.key = 'x',
.arg = "PREFIX",
.doc = "exclude PREFIXed classes",
},
{
.name = "cu_exclude",
.key = 'X',
.arg = "PREFIX",
.doc = "exclude PREFIXed compilation units",
},
{
.name = "anon_include",
.key = 'a',
.doc = "include anonymous classes",
},
{
.name = "nested_anon_include",
.key = 'A',
.doc = "include nested (inside other structs) anonymous classes",
},
{
.name = "verbose",
.key = 'V',
.doc = "be verbose",
},
{
.name = NULL,
}
};
static void (*formatter)(const struct structure *s) = class_formatter;
static error_t pahole__options_parser(int key, char *arg,
[PAHOLE]: Use cus__loadfl, i.e. libdwfl Now we have: [acme@filo pahole]$ pahole --help Usage: pahole [OPTION...] [FILE] {[CLASS]} -a, --anon_include include anonymous classes -A, --nested_anon_include include nested (inside other structs) anonymous classes -B, --bit_holes=NR_HOLES Show only structs at least NR_HOLES bit holes -c, --cacheline_size=SIZE set cacheline size to SIZE -D, --decl_exclude=PREFIX exclude classes declared in files with PREFIX -E, --expand_types expand class members -H, --holes=NR_HOLES show only structs at least NR_HOLES holes -m, --nr_methods show number of methods -n, --nr_members show number of members -N, --class_name_len show size of classes -P, --packable show only structs that has holes that can be packed -R, --reorganize reorg struct trying to kill holes -s, --sizes show size of classes -S, --show_reorg_steps show the struct layout at each reorganization step -t, --nr_definitions show how many times struct was defined -V, --verbose be verbose -x, --exclude=PREFIX exclude PREFIXed classes -X, --cu_exclude=PREFIX exclude PREFIXed compilation units Input Selection: --debuginfo-path=PATH Search path for separate debuginfo files -e, --executable=FILE Find addresses in FILE -k, --kernel Find addresses in the running kernel -K, --offline-kernel[=RELEASE] Kernel with all modules -M, --linux-process-map=FILE Find addresses in files mapped as read from FILE in Linux /proc/PID/maps format -p, --pid=PID Find addresses in files mapped into process PID -?, --help Give this help list --usage Give a short usage message Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-03-30 18:25:51 +02:00
struct argp_state *state)
{
switch (key) {
[PAHOLE]: Use cus__loadfl, i.e. libdwfl Now we have: [acme@filo pahole]$ pahole --help Usage: pahole [OPTION...] [FILE] {[CLASS]} -a, --anon_include include anonymous classes -A, --nested_anon_include include nested (inside other structs) anonymous classes -B, --bit_holes=NR_HOLES Show only structs at least NR_HOLES bit holes -c, --cacheline_size=SIZE set cacheline size to SIZE -D, --decl_exclude=PREFIX exclude classes declared in files with PREFIX -E, --expand_types expand class members -H, --holes=NR_HOLES show only structs at least NR_HOLES holes -m, --nr_methods show number of methods -n, --nr_members show number of members -N, --class_name_len show size of classes -P, --packable show only structs that has holes that can be packed -R, --reorganize reorg struct trying to kill holes -s, --sizes show size of classes -S, --show_reorg_steps show the struct layout at each reorganization step -t, --nr_definitions show how many times struct was defined -V, --verbose be verbose -x, --exclude=PREFIX exclude PREFIXed classes -X, --cu_exclude=PREFIX exclude PREFIXed compilation units Input Selection: --debuginfo-path=PATH Search path for separate debuginfo files -e, --executable=FILE Find addresses in FILE -k, --kernel Find addresses in the running kernel -K, --offline-kernel[=RELEASE] Kernel with all modules -M, --linux-process-map=FILE Find addresses in files mapped as read from FILE in Linux /proc/PID/maps format -p, --pid=PID Find addresses in files mapped into process PID -?, --help Give this help list --usage Give a short usage message Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-03-30 18:25:51 +02:00
case ARGP_KEY_INIT: state->child_inputs[0] = state->input; break;
case 'c': cacheline_size = atoi(arg); break;
case 'C': class_name = arg; break;
[PAHOLE]: Introduce --recursive For now only affects the --contains output. Example showing the structs that include struct list_head in a linux kernel module: [acme@filo pahole]$ pahole --recursive --contains list_head examples/ipv6.ko.debug.x86-64 inet_protosw proto sock_iocb key_type msg_queue msg_msg nf_hook_ops softnet_data net_device softnet_data dma_device dma_client dma_chan class_device net_device softnet_data dma_chan class klist_node device_driver device klist device_driver bus_type device file_system_type nfs_lock_info file_lock block_device address_space inode dquot mem_dqinfo super_block inode signal_struct page kioctx file kiocb work_struct delayed_work kioctx timer_list ifmcaddr6 inet6_dev inet6_ifaddr neigh_table neighbour net_device softnet_data sock inet_sock delayed_work kioctx plist_head task_struct sigpending signal_struct task_struct user_struct device dev_pm_info device mutex_waiter mutex seq_file block_device quota_info super_block dquot super_block inode zone per_cpu_pages free_area kset bus_type subsystem class bus_type __wait_queue_head __wait_queue rw_semaphore quota_info super_block super_block inode key blocking_notifier_head bus_type subsystem class bus_type mm_struct dentry vm_area_struct kobject class_device net_device softnet_data dma_chan device_driver module_kobject module device kset bus_type subsystem class bus_type lock_class module mm_struct task_struct Handling in multi-cu objects is not very precise, as the same struct has different dwarf offsets (id) in each CU. A mitigation for this problem will be provided with the --cu_list and --cu_name upcoming options, where one will be able to get a list of the object files in a, for instance, linux kernel .ko module and also to specify a cu name to be the only to be considered when processing multi-cu files (again, such as .ko linux kernel modules). This ends up being also useful to generate a reverse class hierarchy :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 00:11:51 +02:00
case 'd': recursive = 1; break;
case 'i': find_containers = 1;
class_name = arg; break;
case 'H': nr_holes = atoi(arg); break;
[LIB]: Introduce class__has_hole_ge() That returns if the class has a hole greater or equal to the size specified. Pahole now has a --hole_size_ge command line option to use it. Example on a linux kernel built for x86_64 where we list the structs that have holes bigger than 32 bytes, that provides an approximation of structs with ____cacheline_aligned_in_smp annotated members: [acme@filo pahole]$ pahole --hole_size_ge 32 examples/vmlinux-x86_64 inet_hashinfo rcu_ctrlblk hh_cache net_device files_struct module zone For instance, look at struct zone clever use of such construct: _pad1_ is defined with ZONE_PADDING(_pad1_), that is: /* <40e> /home/acme/git/net-2.6.22/include/linux/mmzone.h:179 */ struct zone { long unsigned int pages_min; /* 0 8 */ long unsigned int pages_low; /* 8 8 */ long unsigned int pages_high; /* 16 8 */ long unsigned int lowmem_reserve[3]; /* 24 24 */ int node; /* 48 4 */ /* XXX 4 bytes hole, try to pack */ long unsigned int min_unmapped_pages; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ long unsigned int min_slab_pages; /* 64 8 */ struct per_cpu_pageset * pageset[255]; /* 72 2040 */ /* --- cacheline 33 boundary (2112 bytes) --- */ spinlock_t lock; /* 2112 4 */ /* XXX 4 bytes hole, try to pack */ struct free_area free_area[11]; /* 2120 264 */ /* XXX 48 bytes hole, try to pack */ /* --- cacheline 38 boundary (2432 bytes) --- */ struct zone_padding _pad1_; /* 2432 0 */ spinlock_t lru_lock; /* 2432 4 */ /* XXX 4 bytes hole, try to pack */ struct list_head active_list; /* 2440 16 */ struct list_head inactive_list; /* 2456 16 */ long unsigned int nr_scan_active; /* 2472 8 */ long unsigned int nr_scan_inactive; /* 2480 8 */ long unsigned int pages_scanned; /* 2488 8 */ /* --- cacheline 39 boundary (2496 bytes) --- */ int all_unreclaimable; /* 2496 4 */ atomic_t reclaim_in_progress; /* 2500 4 */ atomic_long_t vm_stat[20]; /* 2504 160 */ /* --- cacheline 41 boundary (2624 bytes) was 40 bytes ago --- */ int prev_priority; /* 2664 4 */ /* XXX 20 bytes hole, try to pack */ /* --- cacheline 42 boundary (2688 bytes) --- */ struct zone_padding _pad2_; /* 2688 0 */ wait_queue_head_t * wait_table; /* 2688 8 */ long unsigned int wait_table_hash_nr_entries; /* 2696 8 */ long unsigned int wait_table_bits; /* 2704 8 */ struct pglist_data * zone_pgdat; /* 2712 8 */ long unsigned int zone_start_pfn; /* 2720 8 */ long unsigned int spanned_pages; /* 2728 8 */ long unsigned int present_pages; /* 2736 8 */ const char * name; /* 2744 8 */ /* --- cacheline 43 boundary (2752 bytes) --- */ }; /* size: 2752, cachelines: 43 */ /* sum members: 2672, holes: 5, sum holes: 80 */ /* definitions: 933 */ Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 18:32:53 +02:00
case 'z':
hole_size_ge = atoi(arg);
if (!global_verbose)
formatter = class_name_formatter;
break;
case 'B': nr_bit_holes = atoi(arg); break;
case 'E': conf.expand_types = 1; break;
case 'r': conf.rel_offset = 1; break;
[PAHOLE]: Use cus__loadfl, i.e. libdwfl Now we have: [acme@filo pahole]$ pahole --help Usage: pahole [OPTION...] [FILE] {[CLASS]} -a, --anon_include include anonymous classes -A, --nested_anon_include include nested (inside other structs) anonymous classes -B, --bit_holes=NR_HOLES Show only structs at least NR_HOLES bit holes -c, --cacheline_size=SIZE set cacheline size to SIZE -D, --decl_exclude=PREFIX exclude classes declared in files with PREFIX -E, --expand_types expand class members -H, --holes=NR_HOLES show only structs at least NR_HOLES holes -m, --nr_methods show number of methods -n, --nr_members show number of members -N, --class_name_len show size of classes -P, --packable show only structs that has holes that can be packed -R, --reorganize reorg struct trying to kill holes -s, --sizes show size of classes -S, --show_reorg_steps show the struct layout at each reorganization step -t, --nr_definitions show how many times struct was defined -V, --verbose be verbose -x, --exclude=PREFIX exclude PREFIXed classes -X, --cu_exclude=PREFIX exclude PREFIXed compilation units Input Selection: --debuginfo-path=PATH Search path for separate debuginfo files -e, --executable=FILE Find addresses in FILE -k, --kernel Find addresses in the running kernel -K, --offline-kernel[=RELEASE] Kernel with all modules -M, --linux-process-map=FILE Find addresses in files mapped as read from FILE in Linux /proc/PID/maps format -p, --pid=PID Find addresses in files mapped into process PID -?, --help Give this help list --usage Give a short usage message Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-03-30 18:25:51 +02:00
case 'R': reorganize = 1; break;
case 'S': show_reorg_steps = 1; break;
case 's': formatter = size_formatter; break;
case 'n': formatter = nr_members_formatter; break;
case 'N': formatter = class_name_len_formatter; break;
case 'm': formatter = nr_methods_formatter; break;
[PAHOLE]: Use cus__loadfl, i.e. libdwfl Now we have: [acme@filo pahole]$ pahole --help Usage: pahole [OPTION...] [FILE] {[CLASS]} -a, --anon_include include anonymous classes -A, --nested_anon_include include nested (inside other structs) anonymous classes -B, --bit_holes=NR_HOLES Show only structs at least NR_HOLES bit holes -c, --cacheline_size=SIZE set cacheline size to SIZE -D, --decl_exclude=PREFIX exclude classes declared in files with PREFIX -E, --expand_types expand class members -H, --holes=NR_HOLES show only structs at least NR_HOLES holes -m, --nr_methods show number of methods -n, --nr_members show number of members -N, --class_name_len show size of classes -P, --packable show only structs that has holes that can be packed -R, --reorganize reorg struct trying to kill holes -s, --sizes show size of classes -S, --show_reorg_steps show the struct layout at each reorganization step -t, --nr_definitions show how many times struct was defined -V, --verbose be verbose -x, --exclude=PREFIX exclude PREFIXed classes -X, --cu_exclude=PREFIX exclude PREFIXed compilation units Input Selection: --debuginfo-path=PATH Search path for separate debuginfo files -e, --executable=FILE Find addresses in FILE -k, --kernel Find addresses in the running kernel -K, --offline-kernel[=RELEASE] Kernel with all modules -M, --linux-process-map=FILE Find addresses in files mapped as read from FILE in Linux /proc/PID/maps format -p, --pid=PID Find addresses in files mapped into process PID -?, --help Give this help list --usage Give a short usage message Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-03-30 18:25:51 +02:00
case 'P': show_packable = 1; break;
case 't': separator = arg[0]; break;
case 'T': formatter = nr_definitions_formatter; break;
case 'a': class__include_anonymous = 1; break;
case 'A': class__include_nested_anonymous = 1; break;
case 'D': decl_exclude_prefix = arg;
decl_exclude_prefix_len = strlen(decl_exclude_prefix);
break;
case 'x': class__exclude_prefix = arg;
class__exclude_prefix_len = strlen(class__exclude_prefix);
break;
case 'X': cu__exclude_prefix = arg;
cu__exclude_prefix_len = strlen(cu__exclude_prefix);
break;
case 'V': global_verbose = 1;
conf.show_decl_info = 1; break;
default:
return ARGP_ERR_UNKNOWN;
}
return 0;
}
static const char pahole__args_doc[] = "[FILE]";
static struct argp pahole__argp = {
.options = pahole__options,
.parser = pahole__options_parser,
.args_doc = pahole__args_doc,
};
int main(int argc, char *argv[])
{
struct cus *cus;
[PAHOLE]: Introduce --recursive For now only affects the --contains output. Example showing the structs that include struct list_head in a linux kernel module: [acme@filo pahole]$ pahole --recursive --contains list_head examples/ipv6.ko.debug.x86-64 inet_protosw proto sock_iocb key_type msg_queue msg_msg nf_hook_ops softnet_data net_device softnet_data dma_device dma_client dma_chan class_device net_device softnet_data dma_chan class klist_node device_driver device klist device_driver bus_type device file_system_type nfs_lock_info file_lock block_device address_space inode dquot mem_dqinfo super_block inode signal_struct page kioctx file kiocb work_struct delayed_work kioctx timer_list ifmcaddr6 inet6_dev inet6_ifaddr neigh_table neighbour net_device softnet_data sock inet_sock delayed_work kioctx plist_head task_struct sigpending signal_struct task_struct user_struct device dev_pm_info device mutex_waiter mutex seq_file block_device quota_info super_block dquot super_block inode zone per_cpu_pages free_area kset bus_type subsystem class bus_type __wait_queue_head __wait_queue rw_semaphore quota_info super_block super_block inode key blocking_notifier_head bus_type subsystem class bus_type mm_struct dentry vm_area_struct kobject class_device net_device softnet_data dma_chan device_driver module_kobject module device kset bus_type subsystem class bus_type lock_class module mm_struct task_struct Handling in multi-cu objects is not very precise, as the same struct has different dwarf offsets (id) in each CU. A mitigation for this problem will be provided with the --cu_list and --cu_name upcoming options, where one will be able to get a list of the object files in a, for instance, linux kernel .ko module and also to specify a cu name to be the only to be considered when processing multi-cu files (again, such as .ko linux kernel modules). This ends up being also useful to generate a reverse class hierarchy :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 00:11:51 +02:00
int err;
cus = cus__new(NULL, NULL);
if (cus == NULL) {
fputs("pahole: insufficient memory\n", stderr);
return EXIT_FAILURE;
}
err = cus__loadfl(cus, &pahole__argp, argc, argv);
[PAHOLE]: Use cus__loadfl, i.e. libdwfl Now we have: [acme@filo pahole]$ pahole --help Usage: pahole [OPTION...] [FILE] {[CLASS]} -a, --anon_include include anonymous classes -A, --nested_anon_include include nested (inside other structs) anonymous classes -B, --bit_holes=NR_HOLES Show only structs at least NR_HOLES bit holes -c, --cacheline_size=SIZE set cacheline size to SIZE -D, --decl_exclude=PREFIX exclude classes declared in files with PREFIX -E, --expand_types expand class members -H, --holes=NR_HOLES show only structs at least NR_HOLES holes -m, --nr_methods show number of methods -n, --nr_members show number of members -N, --class_name_len show size of classes -P, --packable show only structs that has holes that can be packed -R, --reorganize reorg struct trying to kill holes -s, --sizes show size of classes -S, --show_reorg_steps show the struct layout at each reorganization step -t, --nr_definitions show how many times struct was defined -V, --verbose be verbose -x, --exclude=PREFIX exclude PREFIXed classes -X, --cu_exclude=PREFIX exclude PREFIXed compilation units Input Selection: --debuginfo-path=PATH Search path for separate debuginfo files -e, --executable=FILE Find addresses in FILE -k, --kernel Find addresses in the running kernel -K, --offline-kernel[=RELEASE] Kernel with all modules -M, --linux-process-map=FILE Find addresses in files mapped as read from FILE in Linux /proc/PID/maps format -p, --pid=PID Find addresses in files mapped into process PID -?, --help Give this help list --usage Give a short usage message Mandatory or optional arguments to long options are also mandatory or optional for any corresponding short options. Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-03-30 18:25:51 +02:00
if (err != 0)
return EXIT_FAILURE;
dwarves__init(cacheline_size);
cus__for_each_cu(cus, cu_unique_iterator, NULL, cu__filter);
if (formatter == nr_methods_formatter)
cus__for_each_cu(cus, cu_nr_methods_iterator, NULL, cu__filter);
[PAHOLE]: Introduce --recursive For now only affects the --contains output. Example showing the structs that include struct list_head in a linux kernel module: [acme@filo pahole]$ pahole --recursive --contains list_head examples/ipv6.ko.debug.x86-64 inet_protosw proto sock_iocb key_type msg_queue msg_msg nf_hook_ops softnet_data net_device softnet_data dma_device dma_client dma_chan class_device net_device softnet_data dma_chan class klist_node device_driver device klist device_driver bus_type device file_system_type nfs_lock_info file_lock block_device address_space inode dquot mem_dqinfo super_block inode signal_struct page kioctx file kiocb work_struct delayed_work kioctx timer_list ifmcaddr6 inet6_dev inet6_ifaddr neigh_table neighbour net_device softnet_data sock inet_sock delayed_work kioctx plist_head task_struct sigpending signal_struct task_struct user_struct device dev_pm_info device mutex_waiter mutex seq_file block_device quota_info super_block dquot super_block inode zone per_cpu_pages free_area kset bus_type subsystem class bus_type __wait_queue_head __wait_queue rw_semaphore quota_info super_block super_block inode key blocking_notifier_head bus_type subsystem class bus_type mm_struct dentry vm_area_struct kobject class_device net_device softnet_data dma_chan device_driver module_kobject module device kset bus_type subsystem class bus_type lock_class module mm_struct task_struct Handling in multi-cu objects is not very precise, as the same struct has different dwarf offsets (id) in each CU. A mitigation for this problem will be provided with the --cu_list and --cu_name upcoming options, where one will be able to get a list of the object files in a, for instance, linux kernel .ko module and also to specify a cu name to be the only to be considered when processing multi-cu files (again, such as .ko linux kernel modules). This ends up being also useful to generate a reverse class hierarchy :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 00:11:51 +02:00
memset(tab, ' ', sizeof(tab) - 1);
if (class_name != NULL) {
struct structure *s = structures__find(class_name);
if (s == NULL) {
fprintf(stderr, "struct %s not found!\n", class_name);
return EXIT_FAILURE;
}
if (reorganize) {
size_t savings;
const uint8_t reorg_verbose =
show_reorg_steps ? 2 : global_verbose;
struct class *clone = class__clone(s->class, NULL);
if (clone == NULL) {
fprintf(stderr, "pahole: out of memory!\n");
return EXIT_FAILURE;
}
class__reorganize(clone, s->cu, reorg_verbose, stdout);
savings = class__size(s->class) - class__size(clone);
if (savings != 0 && reorg_verbose) {
putchar('\n');
if (show_reorg_steps)
puts("/* Final reorganized struct: */");
}
tag__fprintf(class__tag(clone), s->cu, &conf, stdout);
if (savings != 0) {
const size_t cacheline_savings =
(tag__nr_cachelines(class__tag(s->class),
s->cu) -
tag__nr_cachelines(class__tag(clone),
s->cu));
printf(" /* saved %zd byte%s", savings,
savings != 1 ? "s" : "");
if (cacheline_savings != 0)
printf(" and %zu cacheline%s",
cacheline_savings,
cacheline_savings != 1 ?
"s" : "");
puts("! */");
}
} else if (find_containers)
[PAHOLE]: Introduce --recursive For now only affects the --contains output. Example showing the structs that include struct list_head in a linux kernel module: [acme@filo pahole]$ pahole --recursive --contains list_head examples/ipv6.ko.debug.x86-64 inet_protosw proto sock_iocb key_type msg_queue msg_msg nf_hook_ops softnet_data net_device softnet_data dma_device dma_client dma_chan class_device net_device softnet_data dma_chan class klist_node device_driver device klist device_driver bus_type device file_system_type nfs_lock_info file_lock block_device address_space inode dquot mem_dqinfo super_block inode signal_struct page kioctx file kiocb work_struct delayed_work kioctx timer_list ifmcaddr6 inet6_dev inet6_ifaddr neigh_table neighbour net_device softnet_data sock inet_sock delayed_work kioctx plist_head task_struct sigpending signal_struct task_struct user_struct device dev_pm_info device mutex_waiter mutex seq_file block_device quota_info super_block dquot super_block inode zone per_cpu_pages free_area kset bus_type subsystem class bus_type __wait_queue_head __wait_queue rw_semaphore quota_info super_block super_block inode key blocking_notifier_head bus_type subsystem class bus_type mm_struct dentry vm_area_struct kobject class_device net_device softnet_data dma_chan device_driver module_kobject module device kset bus_type subsystem class bus_type lock_class module mm_struct task_struct Handling in multi-cu objects is not very precise, as the same struct has different dwarf offsets (id) in each CU. A mitigation for this problem will be provided with the --cu_list and --cu_name upcoming options, where one will be able to get a list of the object files in a, for instance, linux kernel .ko module and also to specify a cu name to be the only to be considered when processing multi-cu files (again, such as .ko linux kernel modules). This ends up being also useful to generate a reverse class hierarchy :-) Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
2007-05-11 00:11:51 +02:00
print_containers(s, 0);
else
tag__fprintf(class__tag(s->class), s->cu, &conf, stdout);
} else
print_classes(formatter);
return EXIT_SUCCESS;
}