29fce8dc85
Switch strings container to using struct btf and its btf__add_str()/btf__find_str() APIs, which do equivalent internal string deduplication. This turns out to be a very significantly faster than using tsearch functions. To satisfy CTF encoding use case, some hacky string size fetching approach is utilized, as libbpf doesn't provide direct API to get total string section size and to copy over just strings data section. BEFORE: 22,624.28 msec task-clock # 1.000 CPUs utilized 85 context-switches # 0.004 K/sec 3 cpu-migrations # 0.000 K/sec 622,545 page-faults # 0.028 M/sec 68,177,206,387 cycles # 3.013 GHz (24.99%) 114,370,031,619 instructions # 1.68 insn per cycle (25.01%) 26,125,001,179 branches # 1154.733 M/sec (25.01%) 458,861,243 branch-misses # 1.76% of all branches (25.00%) 24,533,455,967 L1-dcache-loads # 1084.386 M/sec (25.02%) 973,500,214 L1-dcache-load-misses # 3.97% of all L1-dcache hits (25.05%) 338,773,561 LLC-loads # 14.974 M/sec (25.02%) 12,651,196 LLC-load-misses # 3.73% of all LL-cache hits (25.00%) 22.628910615 seconds time elapsed 21.341063000 seconds user 1.283763000 seconds sys AFTER: 18,362.97 msec task-clock # 1.000 CPUs utilized 37 context-switches # 0.002 K/sec 0 cpu-migrations # 0.000 K/sec 626,281 page-faults # 0.034 M/sec 52,480,619,000 cycles # 2.858 GHz (25.00%) 104,736,434,384 instructions # 2.00 insn per cycle (25.01%) 23,878,428,465 branches # 1300.358 M/sec (25.01%) 252,669,685 branch-misses # 1.06% of all branches (25.03%) 21,829,390,952 L1-dcache-loads # 1188.772 M/sec (25.04%) 638,086,339 L1-dcache-load-misses # 2.92% of all L1-dcache hits (25.02%) 212,327,435 LLC-loads # 11.563 M/sec (25.00%) 14,578,117 LLC-load-misses # 6.87% of all LL-cache hits (25.00%) 18.364427347 seconds time elapsed 16.985494000 seconds user 1.377959000 seconds sys Committer testing: Before: $ perf stat -r5 pahole -J vmlinux Performance counter stats for 'pahole -J vmlinux' (5 runs): 8,735.92 msec task-clock:u # 0.998 CPUs utilized ( +- 0.34% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 353,978 page-faults:u # 0.041 M/sec ( +- 0.00% ) 34,722,167,335 cycles:u # 3.975 GHz ( +- 0.12% ) (83.33%) 555,981,118 stalled-cycles-frontend:u # 1.60% frontend cycles idle ( +- 1.53% ) (83.33%) 5,215,370,531 stalled-cycles-backend:u # 15.02% backend cycles idle ( +- 1.31% ) (83.33%) 72,615,773,119 instructions:u # 2.09 insn per cycle # 0.07 stalled cycles per insn ( +- 0.02% ) (83.34%) 16,624,959,121 branches:u # 1903.057 M/sec ( +- 0.01% ) (83.33%) 229,962,327 branch-misses:u # 1.38% of all branches ( +- 0.07% ) (83.33%) 8.7503 +- 0.0301 seconds time elapsed ( +- 0.34% ) $ After: $ perf stat -r5 pahole -J vmlinux Performance counter stats for 'pahole -J vmlinux' (5 runs): 7,302.31 msec task-clock:u # 0.998 CPUs utilized ( +- 1.16% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 355,884 page-faults:u # 0.049 M/sec ( +- 0.00% ) 29,150,861,078 cycles:u # 3.992 GHz ( +- 0.35% ) (83.33%) 478,705,326 stalled-cycles-frontend:u # 1.64% frontend cycles idle ( +- 2.70% ) (83.33%) 5,351,001,796 stalled-cycles-backend:u # 18.36% backend cycles idle ( +- 1.20% ) (83.33%) 65,835,888,022 instructions:u # 2.26 insn per cycle # 0.08 stalled cycles per insn ( +- 0.03% ) (83.33%) 15,025,195,460 branches:u # 2057.594 M/sec ( +- 0.05% ) (83.34%) 141,209,214 branch-misses:u # 0.94% of all branches ( +- 0.15% ) (83.33%) 7.3140 +- 0.0851 seconds time elapsed ( +- 1.16% ) $ 16.04% less cycles, keep the patches coming! :-) Had to add this patch tho: +++ b/dwarf_loader.c @@ -2159,7 +2159,7 @@ static unsigned long long dwarf_tag__orig_id(const struct tag *tag, static const char *dwarf__strings_ptr(const struct cu *cu __unused, strings_t s) { - return strings__ptr(strings, s); + return s ? strings__ptr(strings, s) : NULL; } To keep preexisting behaviour and to do what the BTF specific strings_ptr method does: static const char *btf_elf__strings_ptr(const struct cu *cu, strings_t s) { return btf_elf__string(cu->priv, s); } const char *btf_elf__string(struct btf_elf *btfe, uint32_t ref) { const char *s = btf__str_by_offset(btfe->btf, ref); return s && s[0] == '\0' ? NULL : s; } With these adjustments, btfdiff on a vmlinux with BTF and DWARF is again clean, i.e. pretty printing from BTF matches what we get when using DWARF. Signed-off-by: Andrii Nakryiko <andriin@fb.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: bpf@vger.kernel.org Cc: dwarves@vger.kernel.org Cc: kernel-team@fb.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
354 lines
8.6 KiB
C
354 lines
8.6 KiB
C
/*
|
|
SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
Copyright (C) 2009 Red Hat Inc.
|
|
Copyright (C) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
|
|
*/
|
|
|
|
#include "dwarves.h"
|
|
#include "libctf.h"
|
|
#include "ctf.h"
|
|
#include "hash.h"
|
|
#include "elf_symtab.h"
|
|
#include <inttypes.h>
|
|
|
|
static int tag__check_id_drift(const struct tag *tag,
|
|
uint32_t core_id, uint32_t ctf_id)
|
|
{
|
|
if (ctf_id != core_id) {
|
|
fprintf(stderr, "%s: %s id drift, core: %u, libctf: %d\n",
|
|
__func__, dwarf_tag_name(tag->tag), core_id, ctf_id);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int dwarf_to_ctf_type(uint16_t tag)
|
|
{
|
|
switch (tag) {
|
|
case DW_TAG_const_type: return CTF_TYPE_KIND_CONST;
|
|
case DW_TAG_pointer_type: return CTF_TYPE_KIND_PTR;
|
|
case DW_TAG_restrict_type: return CTF_TYPE_KIND_RESTRICT;
|
|
case DW_TAG_volatile_type: return CTF_TYPE_KIND_VOLATILE;
|
|
case DW_TAG_class_type:
|
|
case DW_TAG_structure_type: return CTF_TYPE_KIND_STR;
|
|
case DW_TAG_union_type: return CTF_TYPE_KIND_UNION;
|
|
}
|
|
return 0xffff;
|
|
}
|
|
|
|
static int base_type__encode(struct tag *tag, uint32_t core_id, struct ctf *ctf)
|
|
{
|
|
struct base_type *bt = tag__base_type(tag);
|
|
uint32_t ctf_id = ctf__add_base_type(ctf, bt->name, bt->bit_size);
|
|
|
|
if (tag__check_id_drift(tag, core_id, ctf_id))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int pointer_type__encode(struct tag *tag, uint32_t core_id, struct ctf *ctf)
|
|
{
|
|
uint32_t ctf_id = ctf__add_short_type(ctf, dwarf_to_ctf_type(tag->tag), tag->type, 0);
|
|
|
|
if (tag__check_id_drift(tag, core_id, ctf_id))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int typedef__encode(struct tag *tag, uint32_t core_id, struct ctf *ctf)
|
|
{
|
|
uint32_t ctf_id = ctf__add_short_type(ctf, CTF_TYPE_KIND_TYPDEF, tag->type, tag__namespace(tag)->name);
|
|
|
|
if (tag__check_id_drift(tag, core_id, ctf_id))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int fwd_decl__encode(struct tag *tag, uint32_t core_id, struct ctf *ctf)
|
|
{
|
|
uint32_t ctf_id = ctf__add_fwd_decl(ctf, tag__namespace(tag)->name);
|
|
|
|
if (tag__check_id_drift(tag, core_id, ctf_id))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int structure_type__encode(struct tag *tag, uint32_t core_id, struct ctf *ctf)
|
|
{
|
|
struct type *type = tag__type(tag);
|
|
int64_t position;
|
|
uint32_t ctf_id = ctf__add_struct(ctf, dwarf_to_ctf_type(tag->tag),
|
|
type->namespace.name, type->size,
|
|
type->nr_members, &position);
|
|
|
|
if (tag__check_id_drift(tag, core_id, ctf_id))
|
|
return -1;
|
|
|
|
const bool is_short = type->size < CTF_SHORT_MEMBER_LIMIT;
|
|
struct class_member *pos;
|
|
type__for_each_data_member(type, pos) {
|
|
if (is_short)
|
|
ctf__add_short_member(ctf, pos->name, pos->tag.type,
|
|
pos->bit_offset, &position);
|
|
else
|
|
ctf__add_full_member(ctf, pos->name, pos->tag.type,
|
|
pos->bit_offset, &position);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static uint32_t array_type__nelems(struct tag *tag)
|
|
{
|
|
int i;
|
|
uint32_t nelem = 1;
|
|
struct array_type *array = tag__array_type(tag);
|
|
|
|
for (i = array->dimensions - 1; i >= 0; --i)
|
|
nelem *= array->nr_entries[i];
|
|
|
|
return nelem;
|
|
}
|
|
|
|
static int array_type__encode(struct tag *tag, uint32_t core_id, struct ctf *ctf)
|
|
{
|
|
const uint32_t nelems = array_type__nelems(tag);
|
|
uint32_t ctf_id = ctf__add_array(ctf, tag->type, 0, nelems);
|
|
|
|
if (tag__check_id_drift(tag, core_id, ctf_id))
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int subroutine_type__encode(struct tag *tag, uint32_t core_id, struct ctf *ctf)
|
|
{
|
|
struct parameter *pos;
|
|
int64_t position;
|
|
struct ftype *ftype = tag__ftype(tag);
|
|
uint32_t ctf_id = ctf__add_function_type(ctf, tag->type, ftype->nr_parms, ftype->unspec_parms, &position);
|
|
|
|
if (tag__check_id_drift(tag, core_id, ctf_id))
|
|
return -1;
|
|
|
|
ftype__for_each_parameter(ftype, pos)
|
|
ctf__add_parameter(ctf, pos->tag.type, &position);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int enumeration_type__encode(struct tag *tag, uint32_t core_id, struct ctf *ctf)
|
|
{
|
|
struct type *etype = tag__type(tag);
|
|
int64_t position;
|
|
uint32_t ctf_id = ctf__add_enumeration_type(ctf, etype->namespace.name,
|
|
etype->size, etype->nr_members,
|
|
&position);
|
|
|
|
if (tag__check_id_drift(tag, core_id, ctf_id))
|
|
return -1;
|
|
|
|
struct enumerator *pos;
|
|
type__for_each_enumerator(etype, pos)
|
|
ctf__add_enumerator(ctf, pos->name, pos->value, &position);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void tag__encode_ctf(struct tag *tag, uint32_t core_id, struct ctf *ctf)
|
|
{
|
|
switch (tag->tag) {
|
|
case DW_TAG_base_type:
|
|
base_type__encode(tag, core_id, ctf);
|
|
break;
|
|
case DW_TAG_const_type:
|
|
case DW_TAG_pointer_type:
|
|
case DW_TAG_restrict_type:
|
|
case DW_TAG_volatile_type:
|
|
pointer_type__encode(tag, core_id, ctf);
|
|
break;
|
|
case DW_TAG_typedef:
|
|
typedef__encode(tag, core_id, ctf);
|
|
break;
|
|
case DW_TAG_structure_type:
|
|
case DW_TAG_union_type:
|
|
case DW_TAG_class_type:
|
|
if (tag__type(tag)->declaration)
|
|
fwd_decl__encode(tag, core_id, ctf);
|
|
else
|
|
structure_type__encode(tag, core_id, ctf);
|
|
break;
|
|
case DW_TAG_array_type:
|
|
array_type__encode(tag, core_id, ctf);
|
|
break;
|
|
case DW_TAG_subroutine_type:
|
|
subroutine_type__encode(tag, core_id, ctf);
|
|
break;
|
|
case DW_TAG_enumeration_type:
|
|
enumeration_type__encode(tag, core_id, ctf);
|
|
break;
|
|
}
|
|
}
|
|
|
|
#define HASHADDR__BITS 8
|
|
#define HASHADDR__SIZE (1UL << HASHADDR__BITS)
|
|
#define hashaddr__fn(key) hash_64(key, HASHADDR__BITS)
|
|
|
|
static struct function *hashaddr__find_function(const struct hlist_head hashtable[],
|
|
const uint64_t addr)
|
|
{
|
|
struct function *function;
|
|
struct hlist_node *pos;
|
|
uint16_t bucket = hashaddr__fn(addr);
|
|
const struct hlist_head *head = &hashtable[bucket];
|
|
|
|
hlist_for_each_entry(function, pos, head, tool_hnode) {
|
|
if (function->lexblock.ip.addr == addr)
|
|
return function;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct variable *hashaddr__find_variable(const struct hlist_head hashtable[],
|
|
const uint64_t addr)
|
|
{
|
|
struct variable *variable;
|
|
struct hlist_node *pos;
|
|
uint16_t bucket = hashaddr__fn(addr);
|
|
const struct hlist_head *head = &hashtable[bucket];
|
|
|
|
hlist_for_each_entry(variable, pos, head, tool_hnode) {
|
|
if (variable->ip.addr == addr)
|
|
return variable;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* FIXME: Its in the DWARF loader, we have to find a better handoff
|
|
* mechanizm...
|
|
*/
|
|
extern struct strings *strings;
|
|
|
|
int cu__encode_ctf(struct cu *cu, int verbose)
|
|
{
|
|
int err = -1;
|
|
struct ctf *ctf = ctf__new(cu->filename, cu->elf);
|
|
|
|
if (ctf == NULL)
|
|
goto out;
|
|
|
|
if (cu__cache_symtab(cu) < 0)
|
|
goto out_delete;
|
|
|
|
ctf__set_strings(ctf, strings);
|
|
|
|
uint32_t id;
|
|
struct tag *pos;
|
|
cu__for_each_type(cu, id, pos)
|
|
tag__encode_ctf(pos, id, ctf);
|
|
|
|
struct hlist_head hash_addr[HASHADDR__SIZE];
|
|
|
|
for (id = 0; id < HASHADDR__SIZE; ++id)
|
|
INIT_HLIST_HEAD(&hash_addr[id]);
|
|
|
|
struct function *function;
|
|
cu__for_each_function(cu, id, function) {
|
|
uint64_t addr = function->lexblock.ip.addr;
|
|
struct hlist_head *head = &hash_addr[hashaddr__fn(addr)];
|
|
hlist_add_head(&function->tool_hnode, head);
|
|
}
|
|
|
|
uint64_t addr;
|
|
GElf_Sym sym;
|
|
const char *sym_name;
|
|
cu__for_each_cached_symtab_entry(cu, id, sym, sym_name) {
|
|
if (ctf__ignore_symtab_function(&sym, sym_name))
|
|
continue;
|
|
|
|
addr = elf_sym__value(&sym);
|
|
int64_t position;
|
|
function = hashaddr__find_function(hash_addr, addr);
|
|
if (function == NULL) {
|
|
if (verbose)
|
|
fprintf(stderr,
|
|
"function %4d: %-20s %#" PRIx64 " %5u NOT FOUND!\n",
|
|
id, sym_name, addr,
|
|
elf_sym__size(&sym));
|
|
err = ctf__add_function(ctf, 0, 0, 0, &position);
|
|
if (err != 0)
|
|
goto out_err_ctf;
|
|
continue;
|
|
}
|
|
|
|
const struct ftype *ftype = &function->proto;
|
|
err = ctf__add_function(ctf, function->proto.tag.type,
|
|
ftype->nr_parms,
|
|
ftype->unspec_parms, &position);
|
|
|
|
if (err != 0)
|
|
goto out_err_ctf;
|
|
|
|
struct parameter *pos;
|
|
ftype__for_each_parameter(ftype, pos)
|
|
ctf__add_function_parameter(ctf, pos->tag.type, &position);
|
|
}
|
|
|
|
for (id = 0; id < HASHADDR__SIZE; ++id)
|
|
INIT_HLIST_HEAD(&hash_addr[id]);
|
|
|
|
struct variable *var;
|
|
cu__for_each_variable(cu, id, pos) {
|
|
var = tag__variable(pos);
|
|
if (variable__scope(var) != VSCOPE_GLOBAL)
|
|
continue;
|
|
struct hlist_head *head = &hash_addr[hashaddr__fn(var->ip.addr)];
|
|
hlist_add_head(&var->tool_hnode, head);
|
|
}
|
|
|
|
cu__for_each_cached_symtab_entry(cu, id, sym, sym_name) {
|
|
if (ctf__ignore_symtab_object(&sym, sym_name))
|
|
continue;
|
|
addr = elf_sym__value(&sym);
|
|
|
|
var = hashaddr__find_variable(hash_addr, addr);
|
|
if (var == NULL) {
|
|
if (verbose)
|
|
fprintf(stderr,
|
|
"variable %4d: %-20s %#" PRIx64 " %5u NOT FOUND!\n",
|
|
id, sym_name, addr,
|
|
elf_sym__size(&sym));
|
|
err = ctf__add_object(ctf, 0);
|
|
if (err != 0)
|
|
goto out_err_ctf;
|
|
continue;
|
|
}
|
|
|
|
err = ctf__add_object(ctf, var->ip.tag.type);
|
|
if (err != 0)
|
|
goto out_err_ctf;
|
|
}
|
|
|
|
ctf__encode(ctf, CTF_FLAGS_COMPR);
|
|
|
|
err = 0;
|
|
out_delete:
|
|
ctf__delete(ctf);
|
|
out:
|
|
return err;
|
|
out_err_ctf:
|
|
fprintf(stderr,
|
|
"%4d: %-20s %#llx %5u failed encoding, "
|
|
"ABORTING!\n", id, sym_name,
|
|
(unsigned long long)addr, elf_sym__size(&sym));
|
|
goto out_delete;
|
|
}
|