92a4118812
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
210 lines
7.7 KiB
Plaintext
210 lines
7.7 KiB
Plaintext
Here is a summary of changes for the 1.13 version of pahole and its friends:
|
|
|
|
- BTF
|
|
|
|
- Use of the recently introduced BTF deduplication algorithm present in the
|
|
Linux kernel's libbpf library, which allows for all the types in a multi
|
|
compile unit binary such as vmlinux to be compactly stored, without duplicates.
|
|
|
|
E.g.: from roughly:
|
|
|
|
$ readelf -SW ../build/v5.1-rc4+/vmlinux | grep .debug_info.*PROGBITS
|
|
[63] .debug_info PROGBITS 0000000000000000 1d80be0 c3c18b9 00 0 0 1
|
|
$
|
|
195 MiB
|
|
|
|
to:
|
|
|
|
$ time pahole --btf_encode ../build/v5.1-rc4+/vmlinux
|
|
real 0m19.168s
|
|
user 0m17.707s # On a Lenovo t480s (i7-8650U) SSD
|
|
sys 0m1.337s
|
|
$
|
|
|
|
$ readelf -SW ../build/v5.1-rc4+/vmlinux | grep .BTF.*PROGBITS
|
|
[78] .BTF PROGBITS 0000000000000000 27b49f61 1e23c3 00 0 0 1
|
|
$
|
|
~2 MiB
|
|
|
|
- Introduce a 'btfdiff' utility that prints the output from DWARF and from
|
|
BTF, comparing the pretty printed outputs, running it on various linux
|
|
kernel images, such as an allyesconfig for ppc64.
|
|
|
|
Running it on the above 5.1-rc4+ vmlinux:
|
|
|
|
$ btfdiff ../build/v5.1-rc4+/vmlinux
|
|
$
|
|
|
|
No differences from the types generated from the DWARF ELF sections to the
|
|
ones generated from the BTF ELF section.
|
|
|
|
- Add a BTF loader, i.e. 'pahole -F btf' allows pretty printing of structs
|
|
and unions in the same fashion as with DWARF info, and since BTF is way
|
|
more compact, using it is much faster than using DWARF.
|
|
|
|
$ cat ../build/v5.1-rc4+/vmlinux > /dev/null
|
|
$ perf stat -e cycles pahole -F btf ../build/v5.1-rc4+/vmlinux > /dev/null
|
|
|
|
Performance counter stats for 'pahole -F btf ../build/v5.1-rc4+/vmlinux':
|
|
|
|
229,712,692 cycles:u
|
|
0.063379597 seconds time elapsed
|
|
0.056265000 seconds user
|
|
0.006911000 seconds sys
|
|
|
|
$ perf stat -e cycles pahole -F dwarf ../build/v5.1-rc4+/vmlinux > /dev/null
|
|
|
|
Performance counter stats for 'pahole -F dwarf ../build/v5.1-rc4+/vmlinux':
|
|
|
|
49,579,679,466 cycles:u
|
|
13.063487352 seconds time elapsed
|
|
12.612512000 seconds user
|
|
0.426226000 seconds sys
|
|
$
|
|
|
|
- Better union support:
|
|
|
|
- Allow unions to be specified in pahole in the same fashion as structs
|
|
|
|
$ pahole -C thread_union ../build/v5.1-rc4+/net/ipv4/tcp.o
|
|
union thread_union {
|
|
struct task_struct task __attribute__((__aligned__(64))); /* 0 11008 */
|
|
long unsigned int stack[2048]; /* 0 16384 */
|
|
};
|
|
$
|
|
|
|
- Infer __attribute__((__packed__)) when structs have no alignment holes
|
|
and violate basic types (integer, longs, short integer) natural alignment
|
|
requirements. Several heuristics are used to infer the __packed__
|
|
attribute, see the changeset log for descriptions.
|
|
|
|
$ pahole -F btf -C boot_e820_entry ../build/v5.1-rc4+/vmlinux
|
|
struct boot_e820_entry {
|
|
__u64 addr; /* 0 8 */
|
|
__u64 size; /* 8 8 */
|
|
__u32 type; /* 16 4 */
|
|
|
|
/* size: 20, cachelines: 1, members: 3 */
|
|
/* last cacheline: 20 bytes */
|
|
} __attribute__((__packed__));
|
|
$
|
|
|
|
$ pahole -F btf -C lzma_header ../build/v5.1-rc4+/vmlinux
|
|
struct lzma_header {
|
|
uint8_t pos; /* 0 1 */
|
|
uint32_t dict_size; /* 1 4 */
|
|
uint64_t dst_size; /* 5 8 */
|
|
|
|
/* size: 13, cachelines: 1, members: 3 */
|
|
/* last cacheline: 13 bytes */
|
|
} __attribute__((__packed__));
|
|
|
|
- Support DWARF5's DW_AT_alignment, which, together with the __packed__
|
|
attribute inference algorithms produce output that, when compiled, should
|
|
produce structures with layouts that match the original source code.
|
|
|
|
See it in action with 'struct task_struct', which will also show some of the
|
|
new information at the struct summary, at the end of the struct:
|
|
|
|
$ pahole -C task_struct ../build/v5.1-rc4+/vmlinux | tail -19
|
|
/* --- cacheline 103 boundary (6592 bytes) --- */
|
|
struct vm_struct * stack_vm_area; /* 6592 8 */
|
|
refcount_t stack_refcount; /* 6600 4 */
|
|
|
|
/* XXX 4 bytes hole, try to pack */
|
|
|
|
void * security; /* 6608 8 */
|
|
|
|
/* XXX 40 bytes hole, try to pack */
|
|
|
|
/* --- cacheline 104 boundary (6656 bytes) --- */
|
|
struct thread_struct thread __attribute__((__aligned__(64))); /* 6656 4352 */
|
|
|
|
/* size: 11008, cachelines: 172, members: 207 */
|
|
/* sum members: 10902, holes: 16, sum holes: 98 */
|
|
/* sum bitfield members: 10 bits, bit holes: 2, sum bit holes: 54 bits */
|
|
/* paddings: 3, sum paddings: 14 */
|
|
/* forced alignments: 6, forced holes: 1, sum forced holes: 40 */
|
|
} __attribute__((__aligned__(64)));
|
|
$
|
|
|
|
- Add a '--compile' option to 'pfunct' that produces compileable output for the
|
|
function prototypes in an object file. There are still some bugs but the vast
|
|
majority of the kernel single compilation unit files the ones produced from a
|
|
single .c file are working, see the new 'fullcircle' utility that uses this
|
|
feature.
|
|
|
|
Example of it in action:
|
|
|
|
$ pfunct --compile=static_key_false ../build/v5.1-rc4+/net/ipv4/tcp.o
|
|
typedef _Bool bool;
|
|
typedef struct {
|
|
int counter; /* 0 4 */
|
|
|
|
/* size: 4, cachelines: 1, members: 1 */
|
|
/* last cacheline: 4 bytes */
|
|
} atomic_t;
|
|
|
|
struct jump_entry;
|
|
|
|
struct static_key_mod;
|
|
|
|
|
|
struct static_key {
|
|
atomic_t enabled; /* 0 4 */
|
|
|
|
/* XXX 4 bytes hole, try to pack */
|
|
|
|
union {
|
|
long unsigned int type; /* 8 8 */
|
|
struct jump_entry * entries; /* 8 8 */
|
|
struct static_key_mod * next; /* 8 8 */
|
|
}; /* 8 8 */
|
|
|
|
/* size: 16, cachelines: 1, members: 2 */
|
|
/* sum members: 12, holes: 1, sum holes: 4 */
|
|
/* last cacheline: 16 bytes */
|
|
};
|
|
|
|
bool static_key_false(struct static_key * key)
|
|
{
|
|
return *(bool *)1;
|
|
}
|
|
|
|
$
|
|
|
|
The generation of compilable code from the type information and its use in the
|
|
new tool 'fullcircle, helps validate all the parts of this codebase, finding
|
|
bugs that were lurking forever, go read the csets to find all sorts of curious
|
|
C language features that are rarely seen, like unnamed zero sized bitfields and
|
|
the way people have been using it over the years in a codebase like the linux
|
|
kernel.
|
|
|
|
Certainly there are several other features, changes and fixes that I forgot to
|
|
mention! Now lemme release this version so that we can use it more extensively
|
|
together with a recent patch merged for 5.2:
|
|
|
|
[PATCH bpf-next] kbuild: add ability to generate BTF type info for vmlinux
|
|
|
|
With it BTF will be always available for all the types of the kernel, which will
|
|
open a pandora box of cool new features that are in the works, and, for people
|
|
already using pahole, will greatly speed up its usage.
|
|
|
|
Please try to alias it to use btf, i.e.
|
|
|
|
alias pahole='pahole -F btf'
|
|
|
|
Please report any problems you may find with this new version or with the BTF
|
|
loader or any errors in the layout generated/pretty printed.
|
|
|
|
Thanks to the fine BTF guys at Facebook for the patches and help in testing,
|
|
fixing bugs and getting this out of the door, the stats for this release are:
|
|
|
|
Changesets: 157
|
|
|
|
113 Arnaldo Carvalho de Melo Red Hat
|
|
32 Andrii Nakryiko Facebook
|
|
10 Yonghong Song Facebook
|
|
1 Martin Lau Facebook
|
|
1 Domenico Andreoli
|