dwarves/libctf.h
Andrii Nakryiko 29fce8dc85 strings: use BTF's string APIs for strings management
Switch strings container to using struct btf and its
btf__add_str()/btf__find_str() APIs, which do equivalent internal string
deduplication. This turns out to be a very significantly faster than using
tsearch functions. To satisfy CTF encoding use case, some hacky string size
fetching approach is utilized, as libbpf doesn't provide direct API to get
total string section size and to copy over just strings data section.

BEFORE:
         22,624.28 msec task-clock                #    1.000 CPUs utilized
                85      context-switches          #    0.004 K/sec
                 3      cpu-migrations            #    0.000 K/sec
           622,545      page-faults               #    0.028 M/sec
    68,177,206,387      cycles                    #    3.013 GHz                      (24.99%)
   114,370,031,619      instructions              #    1.68  insn per cycle           (25.01%)
    26,125,001,179      branches                  # 1154.733 M/sec                    (25.01%)
       458,861,243      branch-misses             #    1.76% of all branches          (25.00%)
    24,533,455,967      L1-dcache-loads           # 1084.386 M/sec                    (25.02%)
       973,500,214      L1-dcache-load-misses     #    3.97% of all L1-dcache hits    (25.05%)
       338,773,561      LLC-loads                 #   14.974 M/sec                    (25.02%)
        12,651,196      LLC-load-misses           #    3.73% of all LL-cache hits     (25.00%)

      22.628910615 seconds time elapsed

      21.341063000 seconds user
       1.283763000 seconds sys

AFTER:
         18,362.97 msec task-clock                #    1.000 CPUs utilized
                37      context-switches          #    0.002 K/sec
                 0      cpu-migrations            #    0.000 K/sec
           626,281      page-faults               #    0.034 M/sec
    52,480,619,000      cycles                    #    2.858 GHz                      (25.00%)
   104,736,434,384      instructions              #    2.00  insn per cycle           (25.01%)
    23,878,428,465      branches                  # 1300.358 M/sec                    (25.01%)
       252,669,685      branch-misses             #    1.06% of all branches          (25.03%)
    21,829,390,952      L1-dcache-loads           # 1188.772 M/sec                    (25.04%)
       638,086,339      L1-dcache-load-misses     #    2.92% of all L1-dcache hits    (25.02%)
       212,327,435      LLC-loads                 #   11.563 M/sec                    (25.00%)
        14,578,117      LLC-load-misses           #    6.87% of all LL-cache hits     (25.00%)

      18.364427347 seconds time elapsed

      16.985494000 seconds user
       1.377959000 seconds sys

Committer testing:

Before:

  $ perf stat -r5 pahole -J vmlinux

   Performance counter stats for 'pahole -J vmlinux' (5 runs):

            8,735.92 msec task-clock:u              #    0.998 CPUs utilized            ( +-  0.34% )
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
             353,978      page-faults:u             #    0.041 M/sec                    ( +-  0.00% )
      34,722,167,335      cycles:u                  #    3.975 GHz                      ( +-  0.12% )  (83.33%)
         555,981,118      stalled-cycles-frontend:u #    1.60% frontend cycles idle     ( +-  1.53% )  (83.33%)
       5,215,370,531      stalled-cycles-backend:u  #   15.02% backend cycles idle      ( +-  1.31% )  (83.33%)
      72,615,773,119      instructions:u            #    2.09  insn per cycle
                                                    #    0.07  stalled cycles per insn  ( +-  0.02% )  (83.34%)
      16,624,959,121      branches:u                # 1903.057 M/sec                    ( +-  0.01% )  (83.33%)
         229,962,327      branch-misses:u           #    1.38% of all branches          ( +-  0.07% )  (83.33%)

              8.7503 +- 0.0301 seconds time elapsed  ( +-  0.34% )

  $

After:

  $ perf stat -r5 pahole -J vmlinux

   Performance counter stats for 'pahole -J vmlinux' (5 runs):

            7,302.31 msec task-clock:u              #    0.998 CPUs utilized            ( +-  1.16% )
                   0      context-switches:u        #    0.000 K/sec
                   0      cpu-migrations:u          #    0.000 K/sec
             355,884      page-faults:u             #    0.049 M/sec                    ( +-  0.00% )
      29,150,861,078      cycles:u                  #    3.992 GHz                      ( +-  0.35% )  (83.33%)
         478,705,326      stalled-cycles-frontend:u #    1.64% frontend cycles idle     ( +-  2.70% )  (83.33%)
       5,351,001,796      stalled-cycles-backend:u  #   18.36% backend cycles idle      ( +-  1.20% )  (83.33%)
      65,835,888,022      instructions:u            #    2.26  insn per cycle
                                                    #    0.08  stalled cycles per insn  ( +-  0.03% )  (83.33%)
      15,025,195,460      branches:u                # 2057.594 M/sec                    ( +-  0.05% )  (83.34%)
         141,209,214      branch-misses:u           #    0.94% of all branches          ( +-  0.15% )  (83.33%)

              7.3140 +- 0.0851 seconds time elapsed  ( +-  1.16% )

  $

16.04% less cycles, keep the patches coming! :-)

Had to add this patch tho:

  +++ b/dwarf_loader.c
  @@ -2159,7 +2159,7 @@ static unsigned long long dwarf_tag__orig_id(const struct tag *tag,
   static const char *dwarf__strings_ptr(const struct cu *cu __unused,
   				      strings_t s)
   {
  -	return strings__ptr(strings, s);
  +	return s ? strings__ptr(strings, s) : NULL;
   }

To keep preexisting behaviour and to do what the BTF specific
strings_ptr method does:

  static const char *btf_elf__strings_ptr(const struct cu *cu, strings_t s)
  {
          return btf_elf__string(cu->priv, s);
  }

  const char *btf_elf__string(struct btf_elf *btfe, uint32_t ref)
  {
          const char *s = btf__str_by_offset(btfe->btf, ref);

          return s && s[0] == '\0' ? NULL : s;
  }

With these adjustments, btfdiff on a vmlinux with BTF and DWARF is again
clean, i.e. pretty printing from BTF matches what we get when using
DWARF.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: bpf@vger.kernel.org
Cc: dwarves@vger.kernel.org
Cc: kernel-team@fb.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2020-10-20 17:17:51 -03:00

116 lines
3.6 KiB
C

/*
SPDX-License-Identifier: GPL-2.0-only
Copyright (C) 2019 Arnaldo Carvalho de Melo <acme@redhat.com>
*/
#ifndef _LIBCTF_H
#define _LIBCTF_H
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
#include <elf.h>
#include "gobuffer.h"
#include "elf_symtab.h"
struct ctf {
void *buf;
void *priv;
Elf *elf;
struct elf_symtab *symtab;
GElf_Ehdr ehdr;
struct gobuffer objects; /* data/variables */
struct gobuffer types;
struct gobuffer funcs;
struct strings *strings;
char *filename;
size_t size;
int swapped;
int in_fd;
uint8_t wordsize;
uint32_t type_index;
};
struct ctf *ctf__new(const char *filename, Elf *elf);
void ctf__delete(struct ctf *ctf);
bool ctf__ignore_symtab_function(const GElf_Sym *sym, const char *sym_name);
bool ctf__ignore_symtab_object(const GElf_Sym *sym, const char *sym_name);
int ctf__load(struct ctf *ctf);
uint16_t ctf__get16(struct ctf *ctf, uint16_t *p);
uint32_t ctf__get32(struct ctf *ctf, uint32_t *p);
void ctf__put16(struct ctf *ctf, uint16_t *p, uint16_t val);
void ctf__put32(struct ctf *ctf, uint32_t *p, uint32_t val);
void *ctf__get_buffer(struct ctf *ctf);
size_t ctf__get_size(struct ctf *ctf);
int ctf__load_symtab(struct ctf *ctf);
uint32_t ctf__add_base_type(struct ctf *ctf, uint32_t name, uint16_t size);
uint32_t ctf__add_fwd_decl(struct ctf *ctf, uint32_t name);
uint32_t ctf__add_short_type(struct ctf *ctf, uint16_t kind, uint16_t type, uint32_t name);
void ctf__add_short_member(struct ctf *ctf, uint32_t name, uint16_t type,
uint16_t offset, int64_t *position);
void ctf__add_full_member(struct ctf *ctf, uint32_t name, uint16_t type,
uint64_t offset, int64_t *position);
uint32_t ctf__add_struct(struct ctf *ctf, uint16_t kind, uint32_t name,
uint64_t size, uint16_t nr_members, int64_t *position);
uint32_t ctf__add_array(struct ctf *ctf, uint16_t type, uint16_t index_type, uint32_t nelems);
void ctf__add_parameter(struct ctf *ctf, uint16_t type, int64_t *position);
uint32_t ctf__add_function_type(struct ctf *ctf, uint16_t type,
uint16_t nr_parms, bool varargs, int64_t *position);
uint32_t ctf__add_enumeration_type(struct ctf *ctf, uint32_t name, uint16_t size,
uint16_t nr_entries, int64_t *position);
void ctf__add_enumerator(struct ctf *ctf, uint32_t name, uint32_t value,
int64_t *position);
void ctf__add_function_parameter(struct ctf *ctf, uint16_t type,
int64_t *position);
int ctf__add_function(struct ctf *ctf, uint16_t type, uint16_t nr_parms,
bool varargs, int64_t *position);
int ctf__add_object(struct ctf *ctf, uint16_t type);
void ctf__set_strings(struct ctf *ctf, struct strings *strings);
int ctf__encode(struct ctf *ctf, uint8_t flags);
char *ctf__string(struct ctf *ctf, uint32_t ref);
/**
* ctf__for_each_symtab_function - iterate thru all the symtab functions
*
* @ctf: struct ctf instance to iterate
* @index: uint32_t index
* @sym: GElf_Sym iterator
*/
#define ctf__for_each_symtab_function(ctf, index, sym) \
elf_symtab__for_each_symbol(ctf->symtab, index, sym) \
if (ctf__ignore_symtab_function(&sym, \
elf_sym__name(&sym, \
ctf->symtab))) \
continue; \
else
/**
* ctf__for_each_symtab_object - iterate thru all the symtab objects
*
* @ctf: struct ctf instance to iterate
* @index: uint32_t index
* @sym: GElf_Sym iterator
*/
#define ctf__for_each_symtab_object(ctf, index, sym) \
elf_symtab__for_each_symbol(ctf->symtab, index, sym) \
if (ctf__ignore_symtab_object(&sym, \
elf_sym__name(&sym, \
ctf->symtab))) \
continue; \
else
#endif /* _LIBCTF_H */