btf_encoder: revamp how per-CPU variables are encoded
Right now to encode per-CPU variables in BTF, pahole iterates complete vmlinux symbol table for each CU. There are 2500 CUs for a typical kernel image. Overall, to encode 287 per-CPU variables pahole spends more than 10% of its CPU budget, this is incredibly wasteful. This patch revamps how this is done. Now it pre-processes symbol table once before any of per-CU processing starts. It remembers each per-CPU variable symbol, including its address, size, and name. Then during processing each CU, binary search is used to correlate DWARF variable with per-CPU symbols and figure out if variable belongs to per-CPU data section. If the match is found, BTF_KIND_VAR is emitted and var_secinfo is recorded, just like before. At the very end, after all CUs are processed, BTF_KIND_DATASEC is emitted with sorted variables. This change makes per-CPU variables generation overhead pretty negligible and returns back about 10% of CPU usage. Performance counter stats for './pahole -J /home/andriin/linux-build/default/vmlinux': BEFORE: 19.160149000 seconds user 1.304873000 seconds sys 24,114.05 msec task-clock # 0.999 CPUs utilized 83 context-switches # 0.003 K/sec 0 cpu-migrations # 0.000 K/sec 622,417 page-faults # 0.026 M/sec 72,897,315,125 cycles # 3.023 GHz (25.02%) 127,807,316,959 instructions # 1.75 insn per cycle (25.01%) 29,087,179,117 branches # 1206.234 M/sec (25.01%) 464,105,921 branch-misses # 1.60% of all branches (25.01%) 30,252,119,368 L1-dcache-loads # 1254.543 M/sec (25.01%) 1,156,336,207 L1-dcache-load-misses # 3.82% of all L1-dcache hits (25.05%) 343,373,503 LLC-loads # 14.240 M/sec (25.02%) 12,044,977 LLC-load-misses # 3.51% of all LL-cache hits (25.01%) 24.136198321 seconds time elapsed 22.729693000 seconds user 1.384859000 seconds sys AFTER: 16.781455000 seconds user 1.343956000 seconds sys 23,398.77 msec task-clock # 1.000 CPUs utilized 86 context-switches # 0.004 K/sec 0 cpu-migrations # 0.000 K/sec 622,420 page-faults # 0.027 M/sec 68,395,641,468 cycles # 2.923 GHz (25.05%) 114,241,327,034 instructions # 1.67 insn per cycle (25.01%) 26,330,711,718 branches # 1125.303 M/sec (25.01%) 465,926,869 branch-misses # 1.77% of all branches (25.00%) 24,662,984,772 L1-dcache-loads # 1054.029 M/sec (25.00%) 1,054,052,064 L1-dcache-load-misses # 4.27% of all L1-dcache hits (25.00%) 340,970,622 LLC-loads # 14.572 M/sec (25.00%) 16,032,297 LLC-load-misses # 4.70% of all LL-cache hits (25.03%) 23.402259654 seconds time elapsed 21.916437000 seconds user 1.482826000 seconds sys Committer testing: $ grep 'model name' -m1 /proc/cpuinfo model name : AMD Ryzen 9 3900X 12-Core Processor $ Before: $ perf stat -r5 pahole -J vmlinux Performance counter stats for 'pahole -J vmlinux' (5 runs): 9,730.28 msec task-clock:u # 0.998 CPUs utilized ( +- 0.54% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 353,854 page-faults:u # 0.036 M/sec ( +- 0.00% ) 39,721,726,459 cycles:u # 4.082 GHz ( +- 0.07% ) (83.33%) 626,010,654 stalled-cycles-frontend:u # 1.58% frontend cycles idle ( +- 0.91% ) (83.33%) 7,518,333,691 stalled-cycles-backend:u # 18.93% backend cycles idle ( +- 0.56% ) (83.33%) 85,477,123,093 instructions:u # 2.15 insn per cycle # 0.09 stalled cycles per insn ( +- 0.02% ) (83.34%) 19,346,085,683 branches:u # 1988.235 M/sec ( +- 0.03% ) (83.34%) 237,291,787 branch-misses:u # 1.23% of all branches ( +- 0.15% ) (83.33%) 9.7465 +- 0.0524 seconds time elapsed ( +- 0.54% ) $ After: $ perf stat -r5 pahole -J vmlinux Performance counter stats for 'pahole -J vmlinux' (5 runs): 8,953.80 msec task-clock:u # 0.998 CPUs utilized ( +- 0.09% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 353,855 page-faults:u # 0.040 M/sec ( +- 0.00% ) 35,775,730,539 cycles:u # 3.996 GHz ( +- 0.07% ) (83.33%) 579,534,836 stalled-cycles-frontend:u # 1.62% frontend cycles idle ( +- 2.21% ) (83.33%) 5,719,840,144 stalled-cycles-backend:u # 15.99% backend cycles idle ( +- 0.93% ) (83.33%) 73,035,744,786 instructions:u # 2.04 insn per cycle # 0.08 stalled cycles per insn ( +- 0.02% ) (83.34%) 16,798,017,844 branches:u # 1876.077 M/sec ( +- 0.05% ) (83.33%) 237,777,143 branch-misses:u # 1.42% of all branches ( +- 0.15% ) (83.34%) 8.97077 +- 0.00803 seconds time elapsed ( +- 0.09% ) $ Indeed, about 10% shaved, not bad. Signed-off-by: Andrii Nakryiko <andriin@fb.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Andrii Nakryiko <andrii@kernel.org> Cc: Hao Luo <haoluo@google.com> Cc: Oleg Rombakh <olegrom@google.com> Cc: bpf@vger.kernel.org Cc: dwarves@vger.kernel.org Cc: kernel-team@fb.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
0258a47ef9
commit
2e719cca66
246
btf_encoder.c
246
btf_encoder.c
|
@ -17,6 +17,7 @@
|
|||
#include "btf_encoder.h"
|
||||
|
||||
#include <ctype.h> /* for isalpha() and isalnum() */
|
||||
#include <stdlib.h> /* for qsort() and bsearch() */
|
||||
#include <inttypes.h>
|
||||
|
||||
/*
|
||||
|
@ -53,18 +54,18 @@ static bool btf_name_valid(const char *p)
|
|||
return !*p;
|
||||
}
|
||||
|
||||
static void dump_invalid_symbol(const char *msg, const char *sym, const char *cu,
|
||||
static void dump_invalid_symbol(const char *msg, const char *sym,
|
||||
int verbose, bool force)
|
||||
{
|
||||
if (force) {
|
||||
if (verbose)
|
||||
fprintf(stderr, "PAHOLE: Warning: %s, ignored (sym: '%s', cu: '%s').\n",
|
||||
msg, sym, cu);
|
||||
fprintf(stderr, "PAHOLE: Warning: %s, ignored (sym: '%s').\n",
|
||||
msg, sym);
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "PAHOLE: Error: %s (sym: '%s', cu: '%s').\n", msg, sym, cu);
|
||||
fprintf(stderr, "PAHOLE: Error: Use '-j' or '--force' to ignore such symbols and force emit the btf.\n");
|
||||
fprintf(stderr, "PAHOLE: Error: %s (sym: '%s').\n", msg, sym);
|
||||
fprintf(stderr, "PAHOLE: Error: Use '--btf_encode_force' to ignore such symbols and force emit the btf.\n");
|
||||
}
|
||||
|
||||
extern struct debug_fmt_ops *dwarves__active_loader;
|
||||
|
@ -202,6 +203,9 @@ int btf_encoder__encode()
|
|||
{
|
||||
int err;
|
||||
|
||||
if (gobuffer__size(&btfe->percpu_secinfo) != 0)
|
||||
btf_elf__add_datasec_type(btfe, PERCPU_SECTION, &btfe->percpu_secinfo);
|
||||
|
||||
err = btf_elf__encode(btfe, 0);
|
||||
btf_elf__delete(btfe);
|
||||
btfe = NULL;
|
||||
|
@ -209,24 +213,117 @@ int btf_encoder__encode()
|
|||
return err;
|
||||
}
|
||||
|
||||
#define HASHADDR__BITS 8
|
||||
#define HASHADDR__SIZE (1UL << HASHADDR__BITS)
|
||||
#define hashaddr__fn(key) hash_64(key, HASHADDR__BITS)
|
||||
#define MAX_PERCPU_VAR_CNT 4096
|
||||
|
||||
static struct variable *hashaddr__find_variable(const struct hlist_head hashtable[],
|
||||
const uint64_t addr)
|
||||
struct var_info {
|
||||
uint64_t addr;
|
||||
uint32_t sz;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
static struct var_info percpu_vars[MAX_PERCPU_VAR_CNT];
|
||||
static int percpu_var_cnt;
|
||||
|
||||
static int percpu_var_cmp(const void *_a, const void *_b)
|
||||
{
|
||||
struct variable *variable;
|
||||
struct hlist_node *pos;
|
||||
uint16_t bucket = hashaddr__fn(addr);
|
||||
const struct hlist_head *head = &hashtable[bucket];
|
||||
const struct var_info *a = _a;
|
||||
const struct var_info *b = _b;
|
||||
|
||||
hlist_for_each_entry(variable, pos, head, tool_hnode) {
|
||||
if (variable->ip.addr == addr)
|
||||
return variable;
|
||||
if (a->addr == b->addr)
|
||||
return 0;
|
||||
return a->addr < b->addr ? -1 : 1;
|
||||
}
|
||||
|
||||
static bool percpu_var_exists(uint64_t addr, uint32_t *sz, const char **name)
|
||||
{
|
||||
const struct var_info *p;
|
||||
struct var_info key = { .addr = addr };
|
||||
|
||||
p = bsearch(&key, percpu_vars, percpu_var_cnt,
|
||||
sizeof(percpu_vars[0]), percpu_var_cmp);
|
||||
|
||||
if (!p)
|
||||
return false;
|
||||
|
||||
*sz = p->sz;
|
||||
*name = p->name;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int find_all_percpu_vars(struct btf_elf *btfe)
|
||||
{
|
||||
uint32_t core_id;
|
||||
GElf_Sym sym;
|
||||
|
||||
/* cache variables' addresses, preparing for searching in symtab. */
|
||||
percpu_var_cnt = 0;
|
||||
|
||||
/* search within symtab for percpu variables */
|
||||
elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {
|
||||
const char *sym_name;
|
||||
uint64_t addr;
|
||||
uint32_t size;
|
||||
|
||||
/* compare a symbol's shndx to determine if it's a percpu variable */
|
||||
if (elf_sym__section(&sym) != btfe->percpu_shndx)
|
||||
continue;
|
||||
if (elf_sym__type(&sym) != STT_OBJECT)
|
||||
continue;
|
||||
|
||||
addr = elf_sym__value(&sym);
|
||||
/*
|
||||
* Store only those symbols that have allocated space in the percpu section.
|
||||
* This excludes the following three types of symbols:
|
||||
*
|
||||
* 1. __ADDRESSABLE(sym), which are forcely emitted as symbols.
|
||||
* 2. __UNIQUE_ID(prefix), which are introduced to generate unique ids.
|
||||
* 3. __exitcall(fn), functions which are labeled as exit calls.
|
||||
*
|
||||
* In addition, the variables defined using DEFINE_PERCPU_FIRST are
|
||||
* also not included, which currently includes:
|
||||
*
|
||||
* 1. fixed_percpu_data
|
||||
*/
|
||||
if (!addr)
|
||||
continue;
|
||||
|
||||
sym_name = elf_sym__name(&sym, btfe->symtab);
|
||||
if (!btf_name_valid(sym_name)) {
|
||||
dump_invalid_symbol("Found symbol of invalid name when encoding btf",
|
||||
sym_name, btf_elf__verbose, btf_elf__force);
|
||||
if (btf_elf__force)
|
||||
continue;
|
||||
return -1;
|
||||
}
|
||||
size = elf_sym__size(&sym);
|
||||
if (!size) {
|
||||
dump_invalid_symbol("Found symbol of zero size when encoding btf",
|
||||
sym_name, btf_elf__verbose, btf_elf__force);
|
||||
if (btf_elf__force)
|
||||
continue;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (btf_elf__verbose)
|
||||
printf("Found per-CPU symbol '%s' at address 0x%lx\n", sym_name, addr);
|
||||
|
||||
if (percpu_var_cnt == MAX_PERCPU_VAR_CNT) {
|
||||
fprintf(stderr, "Reached the limit of per-CPU variables: %d\n",
|
||||
MAX_PERCPU_VAR_CNT);
|
||||
return -1;
|
||||
}
|
||||
percpu_vars[percpu_var_cnt].addr = addr;
|
||||
percpu_vars[percpu_var_cnt].sz = size;
|
||||
percpu_vars[percpu_var_cnt].name = sym_name;
|
||||
percpu_var_cnt++;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
if (percpu_var_cnt)
|
||||
qsort(percpu_vars, percpu_var_cnt, sizeof(percpu_vars[0]), percpu_var_cmp);
|
||||
|
||||
if (btf_elf__verbose)
|
||||
printf("Found %d per-CPU variables!\n", percpu_var_cnt);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cu__encode_btf(struct cu *cu, int verbose, bool force,
|
||||
|
@ -234,13 +331,10 @@ int cu__encode_btf(struct cu *cu, int verbose, bool force,
|
|||
{
|
||||
uint32_t type_id_off;
|
||||
uint32_t core_id;
|
||||
struct variable *var;
|
||||
struct function *fn;
|
||||
struct tag *pos;
|
||||
int err = 0;
|
||||
struct hlist_head hash_addr[HASHADDR__SIZE];
|
||||
struct variable *var;
|
||||
bool has_global_var = false;
|
||||
GElf_Sym sym;
|
||||
|
||||
if (btfe && strcmp(btfe->filename, cu->filename)) {
|
||||
err = btf_encoder__encode();
|
||||
|
@ -257,6 +351,9 @@ int cu__encode_btf(struct cu *cu, int verbose, bool force,
|
|||
if (!btfe)
|
||||
return -1;
|
||||
|
||||
if (!skip_encoding_vars && find_all_percpu_vars(btfe))
|
||||
goto out;
|
||||
|
||||
has_index_type = false;
|
||||
need_index_type = false;
|
||||
array_index_id = 0;
|
||||
|
@ -278,6 +375,7 @@ int cu__encode_btf(struct cu *cu, int verbose, bool force,
|
|||
}
|
||||
|
||||
btf_elf__verbose = verbose;
|
||||
btf_elf__force = force;
|
||||
type_id_off = btf__get_nr_types(btfe->btf);
|
||||
|
||||
cu__for_each_type(cu, core_id, pos) {
|
||||
|
@ -325,12 +423,11 @@ int cu__encode_btf(struct cu *cu, int verbose, bool force,
|
|||
if (verbose)
|
||||
printf("search cu '%s' for percpu global variables.\n", cu->name);
|
||||
|
||||
/* cache variables' addresses, preparing for searching in symtab. */
|
||||
for (core_id = 0; core_id < HASHADDR__SIZE; ++core_id)
|
||||
INIT_HLIST_HEAD(&hash_addr[core_id]);
|
||||
|
||||
cu__for_each_variable(cu, core_id, pos) {
|
||||
struct hlist_head *head;
|
||||
uint32_t size, type, linkage, offset;
|
||||
const char *name;
|
||||
uint64_t addr;
|
||||
int id;
|
||||
|
||||
var = tag__variable(pos);
|
||||
if (var->declaration && !var->spec)
|
||||
|
@ -338,89 +435,37 @@ int cu__encode_btf(struct cu *cu, int verbose, bool force,
|
|||
/* percpu variables are allocated in global space */
|
||||
if (variable__scope(var) != VSCOPE_GLOBAL && !var->spec)
|
||||
continue;
|
||||
has_global_var = true;
|
||||
head = &hash_addr[hashaddr__fn(var->ip.addr)];
|
||||
hlist_add_head(&var->tool_hnode, head);
|
||||
}
|
||||
if (!has_global_var) {
|
||||
if (verbose)
|
||||
printf("cu has no global variable defined, skip.\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* search within symtab for percpu variables */
|
||||
elf_symtab__for_each_symbol(btfe->symtab, core_id, sym) {
|
||||
uint32_t linkage, type, size, offset;
|
||||
int32_t btf_var_id, btf_var_secinfo_id;
|
||||
uint64_t addr;
|
||||
const char *sym_name;
|
||||
|
||||
/* compare a symbol's shndx to determine if it's a percpu variable */
|
||||
if (elf_sym__section(&sym) != btfe->percpu_shndx)
|
||||
continue;
|
||||
if (elf_sym__type(&sym) != STT_OBJECT)
|
||||
continue;
|
||||
|
||||
addr = elf_sym__value(&sym);
|
||||
/*
|
||||
* Store only those symbols that have allocated space in the percpu section.
|
||||
* This excludes the following three types of symbols:
|
||||
*
|
||||
* 1. __ADDRESSABLE(sym), which are forcely emitted as symbols.
|
||||
* 2. __UNIQUE_ID(prefix), which are introduced to generate unique ids.
|
||||
* 3. __exitcall(fn), functions which are labeled as exit calls.
|
||||
*
|
||||
* In addition, the variables defined using DEFINE_PERCPU_FIRST are
|
||||
* also not included, which currently includes:
|
||||
*
|
||||
* 1. fixed_percpu_data
|
||||
*/
|
||||
if (!addr)
|
||||
continue;
|
||||
var = hashaddr__find_variable(hash_addr, addr);
|
||||
if (var == NULL)
|
||||
continue;
|
||||
/* addr has to be recorded before we follow spec */
|
||||
addr = var->ip.addr;
|
||||
if (var->spec)
|
||||
var = var->spec;
|
||||
|
||||
sym_name = elf_sym__name(&sym, btfe->symtab);
|
||||
if (!btf_name_valid(sym_name)) {
|
||||
dump_invalid_symbol("Found symbol of invalid name when encoding btf",
|
||||
sym_name, cu->name, verbose, force);
|
||||
if (force)
|
||||
continue;
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
if (var->ip.tag.type == 0) {
|
||||
dump_invalid_symbol("Found symbol of void type when encoding btf",
|
||||
sym_name, cu->name, verbose, force);
|
||||
if (force)
|
||||
continue;
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
type = type_id_off + var->ip.tag.type;
|
||||
size = elf_sym__size(&sym);
|
||||
if (!size) {
|
||||
dump_invalid_symbol("Found symbol of zero size when encoding btf",
|
||||
sym_name, cu->name, verbose, force);
|
||||
fprintf(stderr, "error: found variable in CU '%s' that has void type\n",
|
||||
cu->name);
|
||||
if (force)
|
||||
continue;
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (verbose)
|
||||
printf("symbol '%s' of address 0x%lx encoded\n",
|
||||
sym_name, addr);
|
||||
type = var->ip.tag.type + type_id_off;
|
||||
linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : BTF_VAR_STATIC;
|
||||
if (!percpu_var_exists(addr, &size, &name))
|
||||
continue; /* not a per-CPU variable */
|
||||
|
||||
if (btf_elf__verbose) {
|
||||
printf("Variable '%s' from CU '%s' at address 0x%lx encoded\n",
|
||||
name, cu->name, addr);
|
||||
}
|
||||
|
||||
/* add a BTF_KIND_VAR in btfe->types */
|
||||
linkage = var->external ? BTF_VAR_GLOBAL_ALLOCATED : BTF_VAR_STATIC;
|
||||
btf_var_id = btf_elf__add_var_type(btfe, type, sym_name, linkage);
|
||||
if (btf_var_id < 0) {
|
||||
id = btf_elf__add_var_type(btfe, type, name, linkage);
|
||||
if (id < 0) {
|
||||
err = -1;
|
||||
printf("error: failed to encode variable '%s'\n", sym_name);
|
||||
fprintf(stderr, "error: failed to encode variable '%s' at addr 0x%lx\n",
|
||||
name, addr);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -428,13 +473,12 @@ int cu__encode_btf(struct cu *cu, int verbose, bool force,
|
|||
* add a BTF_VAR_SECINFO in btfe->percpu_secinfo, which will be added into
|
||||
* btfe->types later when we add BTF_VAR_DATASEC.
|
||||
*/
|
||||
type = btf_var_id;
|
||||
offset = addr - btfe->percpu_base_addr;
|
||||
btf_var_secinfo_id = btf_elf__add_var_secinfo(&btfe->percpu_secinfo,
|
||||
type, offset, size);
|
||||
if (btf_var_secinfo_id < 0) {
|
||||
id = btf_elf__add_var_secinfo(&btfe->percpu_secinfo, id, offset, size);
|
||||
if (id < 0) {
|
||||
err = -1;
|
||||
printf("error: failed to encode var secinfo '%s'\n", sym_name);
|
||||
fprintf(stderr, "error: failed to encode section info for variable '%s' at addr 0x%lx\n",
|
||||
name, addr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
6
libbtf.c
6
libbtf.c
|
@ -28,6 +28,7 @@
|
|||
#include "elf_symtab.h"
|
||||
|
||||
uint8_t btf_elf__verbose;
|
||||
uint8_t btf_elf__force;
|
||||
|
||||
static int btf_var_secinfo_cmp(const void *a, const void *b)
|
||||
{
|
||||
|
@ -62,7 +63,6 @@ int btf_elf__load(struct btf_elf *btfe)
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
struct btf_elf *btf_elf__new(const char *filename, Elf *elf)
|
||||
{
|
||||
struct btf_elf *btfe = zalloc(sizeof(*btfe));
|
||||
|
@ -771,10 +771,6 @@ int btf_elf__encode(struct btf_elf *btfe, uint8_t flags)
|
|||
{
|
||||
struct btf *btf = btfe->btf;
|
||||
|
||||
if (gobuffer__size(&btfe->percpu_secinfo) != 0)
|
||||
btf_elf__add_datasec_type(btfe, PERCPU_SECTION,
|
||||
&btfe->percpu_secinfo);
|
||||
|
||||
/* Empty file, nothing to do, so... done! */
|
||||
if (btf__get_nr_types(btf) == 0)
|
||||
return 0;
|
||||
|
|
Loading…
Reference in New Issue