2007-12-03 17:43:49 +01:00
|
|
|
project(pahole C)
|
2021-02-02 13:34:30 +01:00
|
|
|
cmake_minimum_required(VERSION 2.8.12)
|
2009-06-17 18:05:44 +02:00
|
|
|
cmake_policy(SET CMP0005 NEW)
|
2007-12-03 17:43:49 +01:00
|
|
|
|
2021-01-04 23:16:22 +01:00
|
|
|
option(LIBBPF_EMBEDDED "Use the embedded version of libbpf instead of searching it via pkg-config" ON)
|
|
|
|
if (NOT LIBBPF_EMBEDDED)
|
|
|
|
find_package(PkgConfig)
|
|
|
|
if(PKGCONFIG_FOUND)
|
|
|
|
pkg_check_modules(LIBBPF libbpf>=0.3.0)
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
|
2020-10-24 20:36:53 +02:00
|
|
|
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}
|
2021-01-04 23:16:22 +01:00
|
|
|
${CMAKE_CURRENT_SOURCE_DIR})
|
|
|
|
if(NOT LIBBPF_FOUND)
|
|
|
|
# Allows to use 'system' style #include with both embedded and system libbpf
|
|
|
|
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/lib/include)
|
|
|
|
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/lib/bpf/include/uapi)
|
|
|
|
else()
|
|
|
|
INCLUDE_DIRECTORIES(${LIBBPF_INCLUDE_DIRS})
|
|
|
|
LINK_DIRECTORIES(${LIBBPF_LIBRARY_DIRS})
|
|
|
|
endif()
|
2006-12-12 03:37:23 +01:00
|
|
|
|
2007-01-30 22:07:28 +01:00
|
|
|
# Try to parse this later, Helio just showed me a KDE4 example to support
|
|
|
|
# x86-64 builds.
|
|
|
|
# the following are directories where stuff will be installed to
|
|
|
|
set(__LIB "" CACHE STRING "Define suffix of directory name (32/64)" )
|
|
|
|
|
2007-02-25 20:43:59 +01:00
|
|
|
macro(_set_fancy _var _value _comment)
|
2007-01-30 22:07:28 +01:00
|
|
|
if (NOT DEFINED ${_var})
|
|
|
|
set(${_var} ${_value})
|
|
|
|
else (NOT DEFINED ${_var})
|
|
|
|
set(${_var} "${${_var}}" CACHE PATH "${_comment}")
|
|
|
|
endif (NOT DEFINED ${_var})
|
2007-02-25 20:43:59 +01:00
|
|
|
endmacro(_set_fancy)
|
2007-01-30 22:07:28 +01:00
|
|
|
|
2006-12-12 03:37:23 +01:00
|
|
|
# where to look first for cmake modules,
|
|
|
|
# before ${CMAKE_ROOT}/Modules/ is checked
|
|
|
|
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/modules")
|
|
|
|
|
2007-04-26 16:02:20 +02:00
|
|
|
if (NOT CMAKE_BUILD_TYPE)
|
|
|
|
set (CMAKE_BUILD_TYPE Debug CACHE STRING
|
|
|
|
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
|
|
|
|
FORCE)
|
|
|
|
endif (NOT CMAKE_BUILD_TYPE)
|
2006-12-23 16:16:30 +01:00
|
|
|
|
2020-07-23 21:36:56 +02:00
|
|
|
set(CMAKE_C_FLAGS_DEBUG "-Wall -Werror -ggdb -O0")
|
dwarves: Add -O2 to CFLAGS
Using the defaults, which uses no -O flags:
[acme@quaco pahole]$ grep CMAKE_C_FLAGS_RELEASE CMakeLists.txt
set(CMAKE_C_FLAGS_RELEASE "-Wall")
[acme@quaco pahole]$ perf stat -r5 pahole -J vmlinux
Performance counter stats for 'pahole -J vmlinux' (5 runs):
18,516.09 msec task-clock:u # 1.000 CPUs utilized ( +- 0.55% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
548,753 page-faults:u # 0.030 M/sec ( +- 0.00% )
68,498,897,915 cycles:u # 3.699 GHz ( +- 0.05% )
123,209,200,210 instructions:u # 1.80 insn per cycle ( +- 0.00% )
25,484,050,414 branches:u # 1376.319 M/sec ( +- 0.00% )
366,334,745 branch-misses:u # 1.44% of all branches ( +- 0.03% )
18.518 +- 0.102 seconds time elapsed ( +- 0.55% )
[acme@quaco pahole]$
With this patch, using -O2:
[acme@quaco pahole]$ grep CMAKE_C_FLAGS_RELEASE CMakeLists.txt
set(CMAKE_C_FLAGS_RELEASE "-Wall -O2")
[acme@quaco pahole]$ perf stat -r5 pahole -J vmlinux
Performance counter stats for 'pahole -J vmlinux' (5 runs):
12,645.96 msec task-clock:u # 1.000 CPUs utilized ( +- 0.61% )
0 context-switches:u # 0.000 K/sec
0 cpu-migrations:u # 0.000 K/sec
548,744 page-faults:u # 0.043 M/sec ( +- 0.00% )
45,359,248,873 cycles:u # 3.587 GHz ( +- 0.03% )
84,051,005,257 instructions:u # 1.85 insn per cycle ( +- 0.00% )
19,755,005,738 branches:u # 1562.159 M/sec ( +- 0.00% )
365,276,883 branch-misses:u # 1.85% of all branches ( +- 0.12% )
12.6471 +- 0.0771 seconds time elapsed ( +- 0.61% )
[acme@quaco pahole]$
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@fb.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2019-12-16 15:15:54 +01:00
|
|
|
set(CMAKE_C_FLAGS_RELEASE "-Wall -O2")
|
2019-07-02 17:33:29 +02:00
|
|
|
|
CMakeLists.txt: Enable SHARED and STATIC lib creation
CMakeLists.txt does not allow creation of static library and link applications
accordingly.
Creation of SHARED and STATIC should be allowed using -DBUILD_SHARED_LIBS
If -DBUILD_SHARED_LIBS option is not supplied, CMakeLists.txt sets it to ON.
Ex:
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=OFF ..
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=ON ..
Committer testing:
I had to fixup it a bit due to changes related to allowing building with
libbpf-devel, test resuts:
With the default, i.e. creating libdwarves.so and using it:
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,612.69 msec task-clock:u # 1.014 CPUs utilized ( +- 0.68% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,702 page-faults:u # 91.330 K/sec ( +- 0.00% )
33,720,048,514 cycles:u # 3.970 GHz ( +- 0.38% ) (83.33%)
689,752,139 stalled-cycles-frontend:u # 2.05% frontend cycles idle ( +- 2.57% ) (83.33%)
5,070,723,369 stalled-cycles-backend:u # 15.04% backend cycles idle ( +- 1.20% ) (83.34%)
77,270,640,084 instructions:u # 2.29 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.01% ) (83.34%)
18,164,028,242 branches:u # 2.139 G/sec ( +- 0.04% ) (83.33%)
150,194,338 branch-misses:u # 0.83% of all branches ( +- 0.17% ) (83.32%)
8.4979 +- 0.0590 seconds time elapsed ( +- 0.69% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
7,001.79 msec task-clock:u # 1.006 CPUs utilized ( +- 1.02% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,980 page-faults:u # 104.736 K/sec ( +- 0.00% )
26,912,307,448 cycles:u # 3.872 GHz ( +- 0.31% ) (83.33%)
509,467,882 stalled-cycles-frontend:u # 1.90% frontend cycles idle ( +- 2.44% ) (83.33%)
3,602,777,263 stalled-cycles-backend:u # 13.43% backend cycles idle ( +- 1.34% ) (83.32%)
66,192,815,701 instructions:u # 2.47 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.01% ) (83.35%)
15,753,663,095 branches:u # 2.267 G/sec ( +- 0.02% ) (83.35%)
98,345,696 branch-misses:u # 0.62% of all branches ( +- 0.14% ) (83.34%)
6.9586 +- 0.0720 seconds time elapsed ( +- 1.03% )
⬢[acme@toolbox pahole]$
Then building with:
⬢[acme@toolbox pahole]$ rm -rf build ; mkdir build ; cd build ; cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. ; cd .. ; make -C build
⬢[acme@toolbox pahole]$ ldd build/pahole
linux-vdso.so.1 (0x00007ffff0172000)
libdw.so.1 => /lib64/libdw.so.1 (0x00007fdd3905d000)
libelf.so.1 => /lib64/libelf.so.1 (0x00007fdd39042000)
libz.so.1 => /lib64/libz.so.1 (0x00007fdd39028000)
libc.so.6 => /lib64/libc.so.6 (0x00007fdd38e59000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007fdd38e52000)
libzstd.so.1 => /lib64/libzstd.so.1 (0x00007fdd38d5c000)
liblzma.so.5 => /lib64/liblzma.so.5 (0x00007fdd38d2e000)
libbz2.so.1 => /lib64/libbz2.so.1 (0x00007fdd38d1b000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fdd38cfa000)
/lib64/ld-linux-x86-64.so.2 (0x00007fdd390ff000)
⬢[acme@toolbox pahole]$
We get some performance improvement:
- First is encoding detached BTF, i.e. read DWARF, generate BTF.
- Second is about loading both DWARF and BTF, producing output for both, that must match.
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,566.34 msec task-clock:u # 1.025 CPUs utilized ( +- 0.85% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,685 page-faults:u # 92.865 K/sec ( +- 0.00% )
33,333,991,512 cycles:u # 3.991 GHz ( +- 0.23% ) (83.34%)
799,187,919 stalled-cycles-frontend:u # 2.42% frontend cycles idle ( +- 1.95% ) (83.33%)
5,157,722,792 stalled-cycles-backend:u # 15.61% backend cycles idle ( +- 1.25% ) (83.33%)
76,273,972,066 instructions:u # 2.31 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.03% ) (83.33%)
17,843,388,470 branches:u # 2.136 G/sec ( +- 0.05% ) (83.33%)
150,507,690 branch-misses:u # 0.84% of all branches ( +- 0.46% ) (83.33%)
8.3561 +- 0.0729 seconds time elapsed ( +- 0.87% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
6,685.39 msec task-clock:u # 0.986 CPUs utilized ( +- 0.54% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,931 page-faults:u # 107.475 K/sec ( +- 0.00% )
26,149,371,139 cycles:u # 3.861 GHz ( +- 0.15% ) (83.36%)
525,918,808 stalled-cycles-frontend:u # 2.00% frontend cycles idle ( +- 2.96% ) (83.33%)
3,547,589,546 stalled-cycles-backend:u # 13.51% backend cycles idle ( +- 1.28% ) (83.35%)
65,389,507,702 instructions:u # 2.49 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.03% ) (83.29%)
15,553,175,605 branches:u # 2.296 G/sec ( +- 0.02% ) (83.34%)
97,812,971 branch-misses:u # 0.63% of all branches ( +- 0.23% ) (83.34%)
6.7784 +- 0.0359 seconds time elapsed ( +- 0.53% )
⬢[acme@toolbox pahole]$
Signed-off-by: Deepak Kumar Mishra <deepakkumar.mishra@arm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-06-07 21:20:13 +02:00
|
|
|
if (NOT DEFINED BUILD_SHARED_LIBS)
|
|
|
|
set (BUILD_SHARED_LIBS ON)
|
|
|
|
message(STATUS "Setting BUILD_SHARED_LIBS = ${BUILD_SHARED_LIBS}")
|
|
|
|
endif (NOT DEFINED BUILD_SHARED_LIBS)
|
|
|
|
|
2020-11-10 16:41:42 +01:00
|
|
|
# Just for grepping, DWARVES_VERSION isn't used anywhere anymore
|
2021-04-10 00:39:15 +02:00
|
|
|
# add_definitions(-D_GNU_SOURCE -DDWARVES_VERSION="v1.21")
|
2020-11-10 16:41:42 +01:00
|
|
|
add_definitions(-D_GNU_SOURCE -DDWARVES_MAJOR_VERSION=1)
|
2021-04-10 00:39:15 +02:00
|
|
|
add_definitions(-D_GNU_SOURCE -DDWARVES_MINOR_VERSION=21)
|
2006-12-12 03:37:23 +01:00
|
|
|
find_package(DWARF REQUIRED)
|
2008-03-04 19:37:02 +01:00
|
|
|
find_package(ZLIB REQUIRED)
|
2006-12-12 03:37:23 +01:00
|
|
|
|
2019-02-07 20:00:36 +01:00
|
|
|
# make sure git submodule(s) are checked out
|
|
|
|
find_package(Git QUIET)
|
|
|
|
if(GIT_FOUND AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
|
|
|
|
# Update submodules as needed
|
|
|
|
option(GIT_SUBMODULE "Check submodules during build" ON)
|
|
|
|
if(GIT_SUBMODULE)
|
|
|
|
message(STATUS "Submodule update")
|
|
|
|
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive
|
|
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
|
|
RESULT_VARIABLE GIT_SUBMOD_RESULT)
|
|
|
|
if(NOT GIT_SUBMOD_RESULT EQUAL "0")
|
|
|
|
message(FATAL_ERROR "git submodule update --init failed with ${GIT_SUBMOD_RESULT}, please checkout submodules")
|
|
|
|
else()
|
|
|
|
message(STATUS "Submodule update - done")
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|
|
endif()
|
2021-01-04 23:16:22 +01:00
|
|
|
if(NOT LIBBPF_FOUND AND NOT EXISTS "${PROJECT_SOURCE_DIR}/lib/bpf/src/btf.h")
|
2019-02-07 20:00:36 +01:00
|
|
|
message(FATAL_ERROR "The submodules were not downloaded! GIT_SUBMODULE was turned off or failed. Please update submodules and try again.")
|
|
|
|
endif()
|
|
|
|
|
2007-04-19 23:01:47 +02:00
|
|
|
_set_fancy(LIB_INSTALL_DIR "${EXEC_INSTALL_PREFIX}${CMAKE_INSTALL_PREFIX}/${__LIB}" "libdir")
|
2007-01-30 22:07:28 +01:00
|
|
|
|
2019-02-07 20:00:36 +01:00
|
|
|
# libbpf uses reallocarray, which is not available in all versions of glibc
|
|
|
|
# libbpf's include/tools/libc_compat.h provides implementation, but needs
|
|
|
|
# COMPACT_NEED_REALLOCARRAY to be set
|
|
|
|
INCLUDE(CheckCSourceCompiles)
|
|
|
|
CHECK_C_SOURCE_COMPILES(
|
|
|
|
"
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include <stdlib.h>
|
|
|
|
int main(void)
|
|
|
|
{
|
|
|
|
return !!reallocarray(NULL, 1, 1);
|
|
|
|
}
|
|
|
|
" HAVE_REALLOCARRAY_SUPPORT)
|
|
|
|
if (NOT HAVE_REALLOCARRAY_SUPPORT)
|
|
|
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCOMPAT_NEED_REALLOCARRAY")
|
|
|
|
endif()
|
|
|
|
|
2020-02-03 08:52:26 +01:00
|
|
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64")
|
|
|
|
|
2021-01-04 23:16:22 +01:00
|
|
|
if (NOT LIBBPF_FOUND)
|
|
|
|
file(GLOB libbpf_sources "lib/bpf/src/*.c")
|
|
|
|
add_library(bpf OBJECT ${libbpf_sources})
|
|
|
|
set_property(TARGET bpf PROPERTY POSITION_INDEPENDENT_CODE 1)
|
|
|
|
target_include_directories(bpf PRIVATE
|
|
|
|
${CMAKE_CURRENT_SOURCE_DIR}/lib/bpf/include
|
|
|
|
${CMAKE_CURRENT_SOURCE_DIR}/lib/bpf/include/uapi)
|
|
|
|
endif()
|
2019-02-07 20:00:36 +01:00
|
|
|
|
2021-04-18 01:46:08 +02:00
|
|
|
set(dwarves_LIB_SRCS dwarves.c dwarves_fprintf.c gobuffer.c strings.c
|
btf_encoder: Move libbtf.c to btf_encoder.c, the only user of its functions
All those functions now operate on a 'struct btf_encoder' object, there
is no need to make them visible outside the btf_encoder.c source file,
so move them all there and make them static.
This leads to some savings as the compiler is free to optimize further,
inlining stuff used in just one place, etc:
Before, for encoding then reading we have:
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,546.56 msec task-clock:u # 0.989 CPUs utilized ( +- 0.71% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,699 page-faults:u # 89.802 K/sec ( +- 0.00% )
34,082,471,148 cycles:u # 3.946 GHz ( +- 0.22% ) (83.33%)
636,039,662 stalled-cycles-frontend:u # 1.87% frontend cycles idle ( +- 1.69% ) (83.33%)
4,895,524,778 stalled-cycles-backend:u # 14.38% backend cycles idle ( +- 2.10% ) (83.33%)
77,379,632,646 instructions:u # 2.27 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.04% ) (83.33%)
18,185,560,802 branches:u # 2.105 G/sec ( +- 0.03% ) (83.34%)
149,715,849 branch-misses:u # 0.82% of all branches ( +- 0.15% ) (83.34%)
8.6412 +- 0.0612 seconds time elapsed ( +- 0.71% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
7,168.97 msec task-clock:u # 1.016 CPUs utilized ( +- 0.50% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,965 page-faults:u # 103.257 K/sec ( +- 0.00% )
27,339,019,686 cycles:u # 3.878 GHz ( +- 0.17% ) (83.28%)
511,689,773 stalled-cycles-frontend:u # 1.88% frontend cycles idle ( +- 1.84% ) (83.34%)
3,677,090,126 stalled-cycles-backend:u # 13.53% backend cycles idle ( +- 1.47% ) (83.35%)
66,182,032,226 instructions:u # 2.44 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.02% ) (83.35%)
15,747,149,247 branches:u # 2.234 G/sec ( +- 0.02% ) (83.36%)
98,013,024 branch-misses:u # 0.62% of all branches ( +- 0.21% ) (83.33%)
7.0554 +- 0.0357 seconds time elapsed ( +- 0.51% )
⬢[acme@toolbox pahole]$
Then, with this patch:
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,280.48 msec task-clock:u # 0.975 CPUs utilized ( +- 0.72% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,699 page-faults:u # 91.481 K/sec ( +- 0.00% )
33,265,078,702 cycles:u # 3.923 GHz ( +- 0.32% ) (83.32%)
725,690,346 stalled-cycles-frontend:u # 2.16% frontend cycles idle ( +- 1.76% ) (83.34%)
4,803,211,469 stalled-cycles-backend:u # 14.33% backend cycles idle ( +- 2.43% ) (83.34%)
77,162,277,929 instructions:u # 2.30 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.06% ) (83.34%)
18,139,715,894 branches:u # 2.139 G/sec ( +- 0.03% ) (83.34%)
149,609,552 branch-misses:u # 0.82% of all branches ( +- 0.16% ) (83.33%)
8.4921 +- 0.0630 seconds time elapsed ( +- 0.74% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
7,018.11 msec task-clock:u # 1.013 CPUs utilized ( +- 0.68% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,949 page-faults:u # 105.207 K/sec ( +- 0.00% )
26,632,191,985 cycles:u # 3.849 GHz ( +- 0.31% ) (83.35%)
496,648,058 stalled-cycles-frontend:u # 1.87% frontend cycles idle ( +- 2.02% ) (83.29%)
3,437,243,040 stalled-cycles-backend:u # 12.92% backend cycles idle ( +- 0.90% ) (83.33%)
66,192,034,237 instructions:u # 2.49 insn per cycle
# 0.05 stalled cycles per insn ( +- 0.03% ) (83.34%)
15,750,883,004 branches:u # 2.276 G/sec ( +- 0.03% ) (83.35%)
97,544,298 branch-misses:u # 0.62% of all branches ( +- 0.12% ) (83.36%)
6.9247 +- 0.0478 seconds time elapsed ( +- 0.69% )
⬢[acme@toolbox pahole]$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-06-09 16:04:09 +02:00
|
|
|
ctf_encoder.c ctf_loader.c libctf.c btf_encoder.c btf_loader.c
|
2018-03-06 01:53:50 +01:00
|
|
|
dwarf_loader.c dutil.c elf_symtab.c rbtree.c)
|
2021-01-04 23:16:22 +01:00
|
|
|
if (NOT LIBBPF_FOUND)
|
|
|
|
list(APPEND dwarves_LIB_SRCS $<TARGET_OBJECTS:bpf>)
|
|
|
|
endif()
|
CMakeLists.txt: Enable SHARED and STATIC lib creation
CMakeLists.txt does not allow creation of static library and link applications
accordingly.
Creation of SHARED and STATIC should be allowed using -DBUILD_SHARED_LIBS
If -DBUILD_SHARED_LIBS option is not supplied, CMakeLists.txt sets it to ON.
Ex:
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=OFF ..
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=ON ..
Committer testing:
I had to fixup it a bit due to changes related to allowing building with
libbpf-devel, test resuts:
With the default, i.e. creating libdwarves.so and using it:
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,612.69 msec task-clock:u # 1.014 CPUs utilized ( +- 0.68% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,702 page-faults:u # 91.330 K/sec ( +- 0.00% )
33,720,048,514 cycles:u # 3.970 GHz ( +- 0.38% ) (83.33%)
689,752,139 stalled-cycles-frontend:u # 2.05% frontend cycles idle ( +- 2.57% ) (83.33%)
5,070,723,369 stalled-cycles-backend:u # 15.04% backend cycles idle ( +- 1.20% ) (83.34%)
77,270,640,084 instructions:u # 2.29 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.01% ) (83.34%)
18,164,028,242 branches:u # 2.139 G/sec ( +- 0.04% ) (83.33%)
150,194,338 branch-misses:u # 0.83% of all branches ( +- 0.17% ) (83.32%)
8.4979 +- 0.0590 seconds time elapsed ( +- 0.69% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
7,001.79 msec task-clock:u # 1.006 CPUs utilized ( +- 1.02% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,980 page-faults:u # 104.736 K/sec ( +- 0.00% )
26,912,307,448 cycles:u # 3.872 GHz ( +- 0.31% ) (83.33%)
509,467,882 stalled-cycles-frontend:u # 1.90% frontend cycles idle ( +- 2.44% ) (83.33%)
3,602,777,263 stalled-cycles-backend:u # 13.43% backend cycles idle ( +- 1.34% ) (83.32%)
66,192,815,701 instructions:u # 2.47 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.01% ) (83.35%)
15,753,663,095 branches:u # 2.267 G/sec ( +- 0.02% ) (83.35%)
98,345,696 branch-misses:u # 0.62% of all branches ( +- 0.14% ) (83.34%)
6.9586 +- 0.0720 seconds time elapsed ( +- 1.03% )
⬢[acme@toolbox pahole]$
Then building with:
⬢[acme@toolbox pahole]$ rm -rf build ; mkdir build ; cd build ; cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. ; cd .. ; make -C build
⬢[acme@toolbox pahole]$ ldd build/pahole
linux-vdso.so.1 (0x00007ffff0172000)
libdw.so.1 => /lib64/libdw.so.1 (0x00007fdd3905d000)
libelf.so.1 => /lib64/libelf.so.1 (0x00007fdd39042000)
libz.so.1 => /lib64/libz.so.1 (0x00007fdd39028000)
libc.so.6 => /lib64/libc.so.6 (0x00007fdd38e59000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007fdd38e52000)
libzstd.so.1 => /lib64/libzstd.so.1 (0x00007fdd38d5c000)
liblzma.so.5 => /lib64/liblzma.so.5 (0x00007fdd38d2e000)
libbz2.so.1 => /lib64/libbz2.so.1 (0x00007fdd38d1b000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fdd38cfa000)
/lib64/ld-linux-x86-64.so.2 (0x00007fdd390ff000)
⬢[acme@toolbox pahole]$
We get some performance improvement:
- First is encoding detached BTF, i.e. read DWARF, generate BTF.
- Second is about loading both DWARF and BTF, producing output for both, that must match.
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,566.34 msec task-clock:u # 1.025 CPUs utilized ( +- 0.85% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,685 page-faults:u # 92.865 K/sec ( +- 0.00% )
33,333,991,512 cycles:u # 3.991 GHz ( +- 0.23% ) (83.34%)
799,187,919 stalled-cycles-frontend:u # 2.42% frontend cycles idle ( +- 1.95% ) (83.33%)
5,157,722,792 stalled-cycles-backend:u # 15.61% backend cycles idle ( +- 1.25% ) (83.33%)
76,273,972,066 instructions:u # 2.31 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.03% ) (83.33%)
17,843,388,470 branches:u # 2.136 G/sec ( +- 0.05% ) (83.33%)
150,507,690 branch-misses:u # 0.84% of all branches ( +- 0.46% ) (83.33%)
8.3561 +- 0.0729 seconds time elapsed ( +- 0.87% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
6,685.39 msec task-clock:u # 0.986 CPUs utilized ( +- 0.54% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,931 page-faults:u # 107.475 K/sec ( +- 0.00% )
26,149,371,139 cycles:u # 3.861 GHz ( +- 0.15% ) (83.36%)
525,918,808 stalled-cycles-frontend:u # 2.00% frontend cycles idle ( +- 2.96% ) (83.33%)
3,547,589,546 stalled-cycles-backend:u # 13.51% backend cycles idle ( +- 1.28% ) (83.35%)
65,389,507,702 instructions:u # 2.49 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.03% ) (83.29%)
15,553,175,605 branches:u # 2.296 G/sec ( +- 0.02% ) (83.34%)
97,812,971 branch-misses:u # 0.63% of all branches ( +- 0.23% ) (83.34%)
6.7784 +- 0.0359 seconds time elapsed ( +- 0.53% )
⬢[acme@toolbox pahole]$
Signed-off-by: Deepak Kumar Mishra <deepakkumar.mishra@arm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-06-07 21:20:13 +02:00
|
|
|
add_library(dwarves ${dwarves_LIB_SRCS})
|
2007-02-25 20:43:59 +01:00
|
|
|
set_target_properties(dwarves PROPERTIES VERSION 1.0.0 SOVERSION 1)
|
2015-09-16 16:31:40 +02:00
|
|
|
set_target_properties(dwarves PROPERTIES INTERFACE_LINK_LIBRARIES "")
|
2021-01-04 23:16:22 +01:00
|
|
|
target_link_libraries(dwarves ${DWARF_LIBRARIES} ${ZLIB_LIBRARIES} ${LIBBPF_LIBRARIES})
|
2006-12-12 03:37:23 +01:00
|
|
|
|
2007-05-06 19:50:28 +02:00
|
|
|
set(dwarves_emit_LIB_SRCS dwarves_emit.c)
|
CMakeLists.txt: Enable SHARED and STATIC lib creation
CMakeLists.txt does not allow creation of static library and link applications
accordingly.
Creation of SHARED and STATIC should be allowed using -DBUILD_SHARED_LIBS
If -DBUILD_SHARED_LIBS option is not supplied, CMakeLists.txt sets it to ON.
Ex:
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=OFF ..
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=ON ..
Committer testing:
I had to fixup it a bit due to changes related to allowing building with
libbpf-devel, test resuts:
With the default, i.e. creating libdwarves.so and using it:
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,612.69 msec task-clock:u # 1.014 CPUs utilized ( +- 0.68% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,702 page-faults:u # 91.330 K/sec ( +- 0.00% )
33,720,048,514 cycles:u # 3.970 GHz ( +- 0.38% ) (83.33%)
689,752,139 stalled-cycles-frontend:u # 2.05% frontend cycles idle ( +- 2.57% ) (83.33%)
5,070,723,369 stalled-cycles-backend:u # 15.04% backend cycles idle ( +- 1.20% ) (83.34%)
77,270,640,084 instructions:u # 2.29 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.01% ) (83.34%)
18,164,028,242 branches:u # 2.139 G/sec ( +- 0.04% ) (83.33%)
150,194,338 branch-misses:u # 0.83% of all branches ( +- 0.17% ) (83.32%)
8.4979 +- 0.0590 seconds time elapsed ( +- 0.69% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
7,001.79 msec task-clock:u # 1.006 CPUs utilized ( +- 1.02% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,980 page-faults:u # 104.736 K/sec ( +- 0.00% )
26,912,307,448 cycles:u # 3.872 GHz ( +- 0.31% ) (83.33%)
509,467,882 stalled-cycles-frontend:u # 1.90% frontend cycles idle ( +- 2.44% ) (83.33%)
3,602,777,263 stalled-cycles-backend:u # 13.43% backend cycles idle ( +- 1.34% ) (83.32%)
66,192,815,701 instructions:u # 2.47 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.01% ) (83.35%)
15,753,663,095 branches:u # 2.267 G/sec ( +- 0.02% ) (83.35%)
98,345,696 branch-misses:u # 0.62% of all branches ( +- 0.14% ) (83.34%)
6.9586 +- 0.0720 seconds time elapsed ( +- 1.03% )
⬢[acme@toolbox pahole]$
Then building with:
⬢[acme@toolbox pahole]$ rm -rf build ; mkdir build ; cd build ; cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. ; cd .. ; make -C build
⬢[acme@toolbox pahole]$ ldd build/pahole
linux-vdso.so.1 (0x00007ffff0172000)
libdw.so.1 => /lib64/libdw.so.1 (0x00007fdd3905d000)
libelf.so.1 => /lib64/libelf.so.1 (0x00007fdd39042000)
libz.so.1 => /lib64/libz.so.1 (0x00007fdd39028000)
libc.so.6 => /lib64/libc.so.6 (0x00007fdd38e59000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007fdd38e52000)
libzstd.so.1 => /lib64/libzstd.so.1 (0x00007fdd38d5c000)
liblzma.so.5 => /lib64/liblzma.so.5 (0x00007fdd38d2e000)
libbz2.so.1 => /lib64/libbz2.so.1 (0x00007fdd38d1b000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fdd38cfa000)
/lib64/ld-linux-x86-64.so.2 (0x00007fdd390ff000)
⬢[acme@toolbox pahole]$
We get some performance improvement:
- First is encoding detached BTF, i.e. read DWARF, generate BTF.
- Second is about loading both DWARF and BTF, producing output for both, that must match.
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,566.34 msec task-clock:u # 1.025 CPUs utilized ( +- 0.85% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,685 page-faults:u # 92.865 K/sec ( +- 0.00% )
33,333,991,512 cycles:u # 3.991 GHz ( +- 0.23% ) (83.34%)
799,187,919 stalled-cycles-frontend:u # 2.42% frontend cycles idle ( +- 1.95% ) (83.33%)
5,157,722,792 stalled-cycles-backend:u # 15.61% backend cycles idle ( +- 1.25% ) (83.33%)
76,273,972,066 instructions:u # 2.31 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.03% ) (83.33%)
17,843,388,470 branches:u # 2.136 G/sec ( +- 0.05% ) (83.33%)
150,507,690 branch-misses:u # 0.84% of all branches ( +- 0.46% ) (83.33%)
8.3561 +- 0.0729 seconds time elapsed ( +- 0.87% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
6,685.39 msec task-clock:u # 0.986 CPUs utilized ( +- 0.54% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,931 page-faults:u # 107.475 K/sec ( +- 0.00% )
26,149,371,139 cycles:u # 3.861 GHz ( +- 0.15% ) (83.36%)
525,918,808 stalled-cycles-frontend:u # 2.00% frontend cycles idle ( +- 2.96% ) (83.33%)
3,547,589,546 stalled-cycles-backend:u # 13.51% backend cycles idle ( +- 1.28% ) (83.35%)
65,389,507,702 instructions:u # 2.49 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.03% ) (83.29%)
15,553,175,605 branches:u # 2.296 G/sec ( +- 0.02% ) (83.34%)
97,812,971 branch-misses:u # 0.63% of all branches ( +- 0.23% ) (83.34%)
6.7784 +- 0.0359 seconds time elapsed ( +- 0.53% )
⬢[acme@toolbox pahole]$
Signed-off-by: Deepak Kumar Mishra <deepakkumar.mishra@arm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-06-07 21:20:13 +02:00
|
|
|
add_library(dwarves_emit ${dwarves_emit_LIB_SRCS})
|
2007-05-06 19:50:28 +02:00
|
|
|
set_target_properties(dwarves_emit PROPERTIES VERSION 1.0.0 SOVERSION 1)
|
2009-12-17 18:20:27 +01:00
|
|
|
target_link_libraries(dwarves_emit dwarves)
|
2007-05-06 19:50:28 +02:00
|
|
|
|
2007-05-07 05:30:02 +02:00
|
|
|
set(dwarves_reorganize_LIB_SRCS dwarves_reorganize.c)
|
CMakeLists.txt: Enable SHARED and STATIC lib creation
CMakeLists.txt does not allow creation of static library and link applications
accordingly.
Creation of SHARED and STATIC should be allowed using -DBUILD_SHARED_LIBS
If -DBUILD_SHARED_LIBS option is not supplied, CMakeLists.txt sets it to ON.
Ex:
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=OFF ..
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=ON ..
Committer testing:
I had to fixup it a bit due to changes related to allowing building with
libbpf-devel, test resuts:
With the default, i.e. creating libdwarves.so and using it:
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,612.69 msec task-clock:u # 1.014 CPUs utilized ( +- 0.68% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,702 page-faults:u # 91.330 K/sec ( +- 0.00% )
33,720,048,514 cycles:u # 3.970 GHz ( +- 0.38% ) (83.33%)
689,752,139 stalled-cycles-frontend:u # 2.05% frontend cycles idle ( +- 2.57% ) (83.33%)
5,070,723,369 stalled-cycles-backend:u # 15.04% backend cycles idle ( +- 1.20% ) (83.34%)
77,270,640,084 instructions:u # 2.29 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.01% ) (83.34%)
18,164,028,242 branches:u # 2.139 G/sec ( +- 0.04% ) (83.33%)
150,194,338 branch-misses:u # 0.83% of all branches ( +- 0.17% ) (83.32%)
8.4979 +- 0.0590 seconds time elapsed ( +- 0.69% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
7,001.79 msec task-clock:u # 1.006 CPUs utilized ( +- 1.02% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,980 page-faults:u # 104.736 K/sec ( +- 0.00% )
26,912,307,448 cycles:u # 3.872 GHz ( +- 0.31% ) (83.33%)
509,467,882 stalled-cycles-frontend:u # 1.90% frontend cycles idle ( +- 2.44% ) (83.33%)
3,602,777,263 stalled-cycles-backend:u # 13.43% backend cycles idle ( +- 1.34% ) (83.32%)
66,192,815,701 instructions:u # 2.47 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.01% ) (83.35%)
15,753,663,095 branches:u # 2.267 G/sec ( +- 0.02% ) (83.35%)
98,345,696 branch-misses:u # 0.62% of all branches ( +- 0.14% ) (83.34%)
6.9586 +- 0.0720 seconds time elapsed ( +- 1.03% )
⬢[acme@toolbox pahole]$
Then building with:
⬢[acme@toolbox pahole]$ rm -rf build ; mkdir build ; cd build ; cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. ; cd .. ; make -C build
⬢[acme@toolbox pahole]$ ldd build/pahole
linux-vdso.so.1 (0x00007ffff0172000)
libdw.so.1 => /lib64/libdw.so.1 (0x00007fdd3905d000)
libelf.so.1 => /lib64/libelf.so.1 (0x00007fdd39042000)
libz.so.1 => /lib64/libz.so.1 (0x00007fdd39028000)
libc.so.6 => /lib64/libc.so.6 (0x00007fdd38e59000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007fdd38e52000)
libzstd.so.1 => /lib64/libzstd.so.1 (0x00007fdd38d5c000)
liblzma.so.5 => /lib64/liblzma.so.5 (0x00007fdd38d2e000)
libbz2.so.1 => /lib64/libbz2.so.1 (0x00007fdd38d1b000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fdd38cfa000)
/lib64/ld-linux-x86-64.so.2 (0x00007fdd390ff000)
⬢[acme@toolbox pahole]$
We get some performance improvement:
- First is encoding detached BTF, i.e. read DWARF, generate BTF.
- Second is about loading both DWARF and BTF, producing output for both, that must match.
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,566.34 msec task-clock:u # 1.025 CPUs utilized ( +- 0.85% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,685 page-faults:u # 92.865 K/sec ( +- 0.00% )
33,333,991,512 cycles:u # 3.991 GHz ( +- 0.23% ) (83.34%)
799,187,919 stalled-cycles-frontend:u # 2.42% frontend cycles idle ( +- 1.95% ) (83.33%)
5,157,722,792 stalled-cycles-backend:u # 15.61% backend cycles idle ( +- 1.25% ) (83.33%)
76,273,972,066 instructions:u # 2.31 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.03% ) (83.33%)
17,843,388,470 branches:u # 2.136 G/sec ( +- 0.05% ) (83.33%)
150,507,690 branch-misses:u # 0.84% of all branches ( +- 0.46% ) (83.33%)
8.3561 +- 0.0729 seconds time elapsed ( +- 0.87% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
6,685.39 msec task-clock:u # 0.986 CPUs utilized ( +- 0.54% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,931 page-faults:u # 107.475 K/sec ( +- 0.00% )
26,149,371,139 cycles:u # 3.861 GHz ( +- 0.15% ) (83.36%)
525,918,808 stalled-cycles-frontend:u # 2.00% frontend cycles idle ( +- 2.96% ) (83.33%)
3,547,589,546 stalled-cycles-backend:u # 13.51% backend cycles idle ( +- 1.28% ) (83.35%)
65,389,507,702 instructions:u # 2.49 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.03% ) (83.29%)
15,553,175,605 branches:u # 2.296 G/sec ( +- 0.02% ) (83.34%)
97,812,971 branch-misses:u # 0.63% of all branches ( +- 0.23% ) (83.34%)
6.7784 +- 0.0359 seconds time elapsed ( +- 0.53% )
⬢[acme@toolbox pahole]$
Signed-off-by: Deepak Kumar Mishra <deepakkumar.mishra@arm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-06-07 21:20:13 +02:00
|
|
|
add_library(dwarves_reorganize ${dwarves_reorganize_LIB_SRCS})
|
2007-05-07 05:30:02 +02:00
|
|
|
set_target_properties(dwarves_reorganize PROPERTIES VERSION 1.0.0 SOVERSION 1)
|
2009-12-17 18:20:27 +01:00
|
|
|
target_link_libraries(dwarves_reorganize dwarves)
|
2007-05-07 05:30:02 +02:00
|
|
|
|
2009-03-18 15:08:23 +01:00
|
|
|
set(codiff_SRCS codiff.c)
|
2007-02-25 20:43:59 +01:00
|
|
|
add_executable(codiff ${codiff_SRCS})
|
2007-03-28 15:18:57 +02:00
|
|
|
target_link_libraries(codiff dwarves)
|
2006-12-12 03:37:23 +01:00
|
|
|
|
2009-03-18 15:08:23 +01:00
|
|
|
set(ctracer_SRCS ctracer.c)
|
2007-02-25 20:43:59 +01:00
|
|
|
add_executable(ctracer ${ctracer_SRCS})
|
2010-11-20 18:18:01 +01:00
|
|
|
target_link_libraries(ctracer dwarves dwarves_emit dwarves_reorganize ${ELF_LIBRARY})
|
2006-12-20 15:16:16 +01:00
|
|
|
|
2007-02-25 20:43:59 +01:00
|
|
|
set(dtagnames_SRCS dtagnames.c)
|
|
|
|
add_executable(dtagnames ${dtagnames_SRCS})
|
2007-03-28 15:18:57 +02:00
|
|
|
target_link_libraries(dtagnames dwarves)
|
2006-12-28 17:03:42 +01:00
|
|
|
|
2007-02-25 20:43:59 +01:00
|
|
|
set(pahole_SRCS pahole.c)
|
|
|
|
add_executable(pahole ${pahole_SRCS})
|
2007-05-07 05:30:02 +02:00
|
|
|
target_link_libraries(pahole dwarves dwarves_reorganize)
|
2006-12-12 03:37:23 +01:00
|
|
|
|
2007-02-25 20:43:59 +01:00
|
|
|
set(pdwtags_SRCS pdwtags.c)
|
|
|
|
add_executable(pdwtags ${pdwtags_SRCS})
|
2007-03-28 15:18:57 +02:00
|
|
|
target_link_libraries(pdwtags dwarves)
|
2007-01-19 00:13:56 +01:00
|
|
|
|
2007-02-25 20:43:59 +01:00
|
|
|
set(pglobal_SRCS pglobal.c)
|
|
|
|
add_executable(pglobal ${pglobal_SRCS})
|
2007-03-28 15:18:57 +02:00
|
|
|
target_link_libraries(pglobal dwarves)
|
2007-02-02 14:56:53 +01:00
|
|
|
|
2019-02-07 20:00:36 +01:00
|
|
|
set(pfunct_SRCS pfunct.c)
|
2007-02-25 20:43:59 +01:00
|
|
|
add_executable(pfunct ${pfunct_SRCS})
|
2010-11-20 18:18:01 +01:00
|
|
|
target_link_libraries(pfunct dwarves dwarves_emit ${ELF_LIBRARY})
|
2006-12-12 03:37:23 +01:00
|
|
|
|
2007-02-25 20:43:59 +01:00
|
|
|
set(prefcnt_SRCS prefcnt.c)
|
|
|
|
add_executable(prefcnt ${prefcnt_SRCS})
|
2007-03-28 15:18:57 +02:00
|
|
|
target_link_libraries(prefcnt dwarves)
|
2007-01-11 19:41:54 +01:00
|
|
|
|
2010-01-11 21:50:31 +01:00
|
|
|
set(scncopy_SRCS scncopy.c elfcreator.c)
|
|
|
|
add_executable(scncopy ${scncopy_SRCS})
|
2010-11-20 18:18:01 +01:00
|
|
|
target_link_libraries(scncopy dwarves ${ELF_LIBRARY})
|
2010-01-11 21:50:31 +01:00
|
|
|
|
2007-06-12 20:53:46 +02:00
|
|
|
set(syscse_SRCS syscse.c)
|
|
|
|
add_executable(syscse ${syscse_SRCS})
|
|
|
|
target_link_libraries(syscse dwarves)
|
|
|
|
|
2007-12-28 02:00:07 +01:00
|
|
|
install(TARGETS codiff ctracer dtagnames pahole pdwtags
|
2010-01-11 21:50:31 +01:00
|
|
|
pfunct pglobal prefcnt scncopy syscse RUNTIME DESTINATION
|
2007-04-19 23:01:47 +02:00
|
|
|
${CMAKE_INSTALL_PREFIX}/bin)
|
CMakeLists.txt: Enable SHARED and STATIC lib creation
CMakeLists.txt does not allow creation of static library and link applications
accordingly.
Creation of SHARED and STATIC should be allowed using -DBUILD_SHARED_LIBS
If -DBUILD_SHARED_LIBS option is not supplied, CMakeLists.txt sets it to ON.
Ex:
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=OFF ..
$ cmake -D__LIB=lib -DBUILD_SHARED_LIBS=ON ..
Committer testing:
I had to fixup it a bit due to changes related to allowing building with
libbpf-devel, test resuts:
With the default, i.e. creating libdwarves.so and using it:
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,612.69 msec task-clock:u # 1.014 CPUs utilized ( +- 0.68% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,702 page-faults:u # 91.330 K/sec ( +- 0.00% )
33,720,048,514 cycles:u # 3.970 GHz ( +- 0.38% ) (83.33%)
689,752,139 stalled-cycles-frontend:u # 2.05% frontend cycles idle ( +- 2.57% ) (83.33%)
5,070,723,369 stalled-cycles-backend:u # 15.04% backend cycles idle ( +- 1.20% ) (83.34%)
77,270,640,084 instructions:u # 2.29 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.01% ) (83.34%)
18,164,028,242 branches:u # 2.139 G/sec ( +- 0.04% ) (83.33%)
150,194,338 branch-misses:u # 0.83% of all branches ( +- 0.17% ) (83.32%)
8.4979 +- 0.0590 seconds time elapsed ( +- 0.69% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
7,001.79 msec task-clock:u # 1.006 CPUs utilized ( +- 1.02% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,980 page-faults:u # 104.736 K/sec ( +- 0.00% )
26,912,307,448 cycles:u # 3.872 GHz ( +- 0.31% ) (83.33%)
509,467,882 stalled-cycles-frontend:u # 1.90% frontend cycles idle ( +- 2.44% ) (83.33%)
3,602,777,263 stalled-cycles-backend:u # 13.43% backend cycles idle ( +- 1.34% ) (83.32%)
66,192,815,701 instructions:u # 2.47 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.01% ) (83.35%)
15,753,663,095 branches:u # 2.267 G/sec ( +- 0.02% ) (83.35%)
98,345,696 branch-misses:u # 0.62% of all branches ( +- 0.14% ) (83.34%)
6.9586 +- 0.0720 seconds time elapsed ( +- 1.03% )
⬢[acme@toolbox pahole]$
Then building with:
⬢[acme@toolbox pahole]$ rm -rf build ; mkdir build ; cd build ; cmake -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release .. ; cd .. ; make -C build
⬢[acme@toolbox pahole]$ ldd build/pahole
linux-vdso.so.1 (0x00007ffff0172000)
libdw.so.1 => /lib64/libdw.so.1 (0x00007fdd3905d000)
libelf.so.1 => /lib64/libelf.so.1 (0x00007fdd39042000)
libz.so.1 => /lib64/libz.so.1 (0x00007fdd39028000)
libc.so.6 => /lib64/libc.so.6 (0x00007fdd38e59000)
libdl.so.2 => /lib64/libdl.so.2 (0x00007fdd38e52000)
libzstd.so.1 => /lib64/libzstd.so.1 (0x00007fdd38d5c000)
liblzma.so.5 => /lib64/liblzma.so.5 (0x00007fdd38d2e000)
libbz2.so.1 => /lib64/libbz2.so.1 (0x00007fdd38d1b000)
libpthread.so.0 => /lib64/libpthread.so.0 (0x00007fdd38cfa000)
/lib64/ld-linux-x86-64.so.2 (0x00007fdd390ff000)
⬢[acme@toolbox pahole]$
We get some performance improvement:
- First is encoding detached BTF, i.e. read DWARF, generate BTF.
- Second is about loading both DWARF and BTF, producing output for both, that must match.
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,566.34 msec task-clock:u # 1.025 CPUs utilized ( +- 0.85% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,685 page-faults:u # 92.865 K/sec ( +- 0.00% )
33,333,991,512 cycles:u # 3.991 GHz ( +- 0.23% ) (83.34%)
799,187,919 stalled-cycles-frontend:u # 2.42% frontend cycles idle ( +- 1.95% ) (83.33%)
5,157,722,792 stalled-cycles-backend:u # 15.61% backend cycles idle ( +- 1.25% ) (83.33%)
76,273,972,066 instructions:u # 2.31 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.03% ) (83.33%)
17,843,388,470 branches:u # 2.136 G/sec ( +- 0.05% ) (83.33%)
150,507,690 branch-misses:u # 0.84% of all branches ( +- 0.46% ) (83.33%)
8.3561 +- 0.0729 seconds time elapsed ( +- 0.87% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
6,685.39 msec task-clock:u # 0.986 CPUs utilized ( +- 0.54% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,931 page-faults:u # 107.475 K/sec ( +- 0.00% )
26,149,371,139 cycles:u # 3.861 GHz ( +- 0.15% ) (83.36%)
525,918,808 stalled-cycles-frontend:u # 2.00% frontend cycles idle ( +- 2.96% ) (83.33%)
3,547,589,546 stalled-cycles-backend:u # 13.51% backend cycles idle ( +- 1.28% ) (83.35%)
65,389,507,702 instructions:u # 2.49 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.03% ) (83.29%)
15,553,175,605 branches:u # 2.296 G/sec ( +- 0.02% ) (83.34%)
97,812,971 branch-misses:u # 0.63% of all branches ( +- 0.23% ) (83.34%)
6.7784 +- 0.0359 seconds time elapsed ( +- 0.53% )
⬢[acme@toolbox pahole]$
Signed-off-by: Deepak Kumar Mishra <deepakkumar.mishra@arm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Qais Yousef <qais.yousef@arm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-06-07 21:20:13 +02:00
|
|
|
install(TARGETS dwarves LIBRARY DESTINATION ${LIB_INSTALL_DIR} ARCHIVE DESTINATION ${LIB_INSTALL_DIR})
|
|
|
|
install(TARGETS dwarves dwarves_emit dwarves_reorganize LIBRARY DESTINATION ${LIB_INSTALL_DIR} ARCHIVE DESTINATION ${LIB_INSTALL_DIR})
|
2009-04-23 20:08:10 +02:00
|
|
|
install(FILES dwarves.h dwarves_emit.h dwarves_reorganize.h
|
2020-10-20 21:30:04 +02:00
|
|
|
dutil.h gobuffer.h list.h rbtree.h pahole_strings.h
|
2019-04-23 16:25:16 +02:00
|
|
|
btf_encoder.h config.h ctf_encoder.h ctf.h
|
btf_encoder: Move libbtf.c to btf_encoder.c, the only user of its functions
All those functions now operate on a 'struct btf_encoder' object, there
is no need to make them visible outside the btf_encoder.c source file,
so move them all there and make them static.
This leads to some savings as the compiler is free to optimize further,
inlining stuff used in just one place, etc:
Before, for encoding then reading we have:
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,546.56 msec task-clock:u # 0.989 CPUs utilized ( +- 0.71% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,699 page-faults:u # 89.802 K/sec ( +- 0.00% )
34,082,471,148 cycles:u # 3.946 GHz ( +- 0.22% ) (83.33%)
636,039,662 stalled-cycles-frontend:u # 1.87% frontend cycles idle ( +- 1.69% ) (83.33%)
4,895,524,778 stalled-cycles-backend:u # 14.38% backend cycles idle ( +- 2.10% ) (83.33%)
77,379,632,646 instructions:u # 2.27 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.04% ) (83.33%)
18,185,560,802 branches:u # 2.105 G/sec ( +- 0.03% ) (83.34%)
149,715,849 branch-misses:u # 0.82% of all branches ( +- 0.15% ) (83.34%)
8.6412 +- 0.0612 seconds time elapsed ( +- 0.71% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
7,168.97 msec task-clock:u # 1.016 CPUs utilized ( +- 0.50% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,965 page-faults:u # 103.257 K/sec ( +- 0.00% )
27,339,019,686 cycles:u # 3.878 GHz ( +- 0.17% ) (83.28%)
511,689,773 stalled-cycles-frontend:u # 1.88% frontend cycles idle ( +- 1.84% ) (83.34%)
3,677,090,126 stalled-cycles-backend:u # 13.53% backend cycles idle ( +- 1.47% ) (83.35%)
66,182,032,226 instructions:u # 2.44 insn per cycle
# 0.06 stalled cycles per insn ( +- 0.02% ) (83.35%)
15,747,149,247 branches:u # 2.234 G/sec ( +- 0.02% ) (83.36%)
98,013,024 branch-misses:u # 0.62% of all branches ( +- 0.21% ) (83.33%)
7.0554 +- 0.0357 seconds time elapsed ( +- 0.51% )
⬢[acme@toolbox pahole]$
Then, with this patch:
⬢[acme@toolbox pahole]$ rm -f vmlinux.btf ; perf stat -r5 pahole -j vmlinux.btf vmlinux && perf stat -r5 btfdiff vmlinux vmlinux.btf
Performance counter stats for 'pahole -j vmlinux.btf vmlinux' (5 runs):
8,280.48 msec task-clock:u # 0.975 CPUs utilized ( +- 0.72% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
775,699 page-faults:u # 91.481 K/sec ( +- 0.00% )
33,265,078,702 cycles:u # 3.923 GHz ( +- 0.32% ) (83.32%)
725,690,346 stalled-cycles-frontend:u # 2.16% frontend cycles idle ( +- 1.76% ) (83.34%)
4,803,211,469 stalled-cycles-backend:u # 14.33% backend cycles idle ( +- 2.43% ) (83.34%)
77,162,277,929 instructions:u # 2.30 insn per cycle
# 0.07 stalled cycles per insn ( +- 0.06% ) (83.34%)
18,139,715,894 branches:u # 2.139 G/sec ( +- 0.03% ) (83.34%)
149,609,552 branch-misses:u # 0.82% of all branches ( +- 0.16% ) (83.33%)
8.4921 +- 0.0630 seconds time elapsed ( +- 0.74% )
Performance counter stats for 'btfdiff vmlinux vmlinux.btf' (5 runs):
7,018.11 msec task-clock:u # 1.013 CPUs utilized ( +- 0.68% )
0 context-switches:u # 0.000 /sec
0 cpu-migrations:u # 0.000 /sec
727,949 page-faults:u # 105.207 K/sec ( +- 0.00% )
26,632,191,985 cycles:u # 3.849 GHz ( +- 0.31% ) (83.35%)
496,648,058 stalled-cycles-frontend:u # 1.87% frontend cycles idle ( +- 2.02% ) (83.29%)
3,437,243,040 stalled-cycles-backend:u # 12.92% backend cycles idle ( +- 0.90% ) (83.33%)
66,192,034,237 instructions:u # 2.49 insn per cycle
# 0.05 stalled cycles per insn ( +- 0.03% ) (83.34%)
15,750,883,004 branches:u # 2.276 G/sec ( +- 0.03% ) (83.35%)
97,544,298 branch-misses:u # 0.62% of all branches ( +- 0.12% ) (83.36%)
6.9247 +- 0.0478 seconds time elapsed ( +- 0.69% )
⬢[acme@toolbox pahole]$
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2021-06-09 16:04:09 +02:00
|
|
|
elfcreator.h elf_symtab.h hash.h libctf.h
|
2009-04-23 20:08:10 +02:00
|
|
|
DESTINATION ${CMAKE_INSTALL_PREFIX}/include/dwarves/)
|
2009-02-11 15:19:46 +01:00
|
|
|
install(FILES man-pages/pahole.1 DESTINATION ${CMAKE_INSTALL_PREFIX}/share/man/man1/)
|
2007-08-14 16:33:54 +02:00
|
|
|
install(PROGRAMS ostra/ostra-cg DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
|
2019-04-16 21:13:19 +02:00
|
|
|
install(PROGRAMS btfdiff fullcircle DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
|
2007-12-03 17:43:49 +01:00
|
|
|
install(FILES ostra/python/ostra.py DESTINATION ${CMAKE_INSTALL_PREFIX}/share/dwarves/runtime/python)
|
2007-12-06 20:43:20 +01:00
|
|
|
install(FILES lib/Makefile lib/ctracer_relay.c lib/ctracer_relay.h lib/linux.blacklist.cu
|
2007-12-03 17:43:49 +01:00
|
|
|
DESTINATION ${CMAKE_INSTALL_PREFIX}/share/dwarves/runtime)
|