Skip to content

Commit

Permalink
feat (I/O recording): Add POSIX I/O recording
Browse files Browse the repository at this point in the history
This commit adds a "--posix-io" option to record the I/O activity of
applications that use read()/write()/open[at]() etc.

Because POSIX I/O operation are only well-ordered when viewed from the
angle of the thread executing them, POSIX I/O measurement is only
available in process monitoring mode.

This is the first lo2s component which utilizes the in-kernel virtual
machine  BPF for the recording of
events. A classical tracepoint based measurement process is not possible
for recording POSIX I/O as the filename, which is critical for
identifying what files are exactly written, is only ever passed as a
pointer to user-space memory in an application, which is not lo2s. We can normally
not read this pointer in lo2s, rendering it pretty much useless.

Using a small in-kernel BPF program, we can access the memory and copy
the name to lo2s accessible memory.

The changes introduced here result in a few new/changed major
components:

- CMake code to find and include libbpf as well as bpftool. This code
  conditionally decides wether POSIX I/O support is enabled.
- A BPF program, posix_io.bpf.c, which attachs itself to the necessary
  probe interfaces, performs the required in-kernel transformations
  of the events and writes them to a shared ring buffer.
    - The object code of this BPF program is included by the BPF toolkit
      into a "posix_io.bpf.o" object file, which also contains the
      necessary extra code to successfully load it into the kernel.
      This object files is linked into lo2s.
- A PosixMonitor, residing at the other end of BPF ring-buffer, writing
  OTF-2 events for the incoming BPF events.
- The necessary Trace suppport infrastructure to create a POSIX I/O
  writer as well as the required OTF-2 definitions.
  • Loading branch information
cvonelm committed Feb 26, 2025
1 parent 003ec6c commit c670be1
Show file tree
Hide file tree
Showing 18 changed files with 1,166 additions and 4 deletions.
21 changes: 20 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ IfUpdatedUnsetAll(lo2s_USE_STATIC_LIBS
Radare_USE_STATIC_LIBS
Sensors_USE_STATIC_LIBS
Veosinfo_USE_STATIC_LIBS
Audit_USE_STATIC_LIBS)
Audit_USE_STATIC_LIBS
LibBpf_USE_STATIC_LIBS)

if(lo2s_USE_STATIC_LIBS STREQUAL "OFF")
set(Dl_USE_STATIC_LIBS OFF CACHE BOOL "")
Expand All @@ -53,6 +54,7 @@ if(lo2s_USE_STATIC_LIBS STREQUAL "OFF")
set(Veosinfo_USE_STATIC_LIBS OFF CACHE BOOL "")
set(Radare_USE_STATIC_LIBS OFF CACHE BOOL "")
set(Audit_USE_STATIC_LIBS OFF CACHE BOOL "")
set(LibBpf_USE_STATIC_LIBS OFF CACHE BOOL "")
else()
if(lo2s_USE_STATIC_LIBS STREQUAL "MOSTLY")
set(Dl_USE_STATIC_LIBS OFF CACHE BOOL "")
Expand All @@ -78,6 +80,7 @@ else()
set(Veosinfo_USE_STATIC_LIBS ON CACHE BOOL "")
set(Radare_USE_STATIC_LIBS ON CACHE BOOL "")
set(Audit_USE_STATIC_LIBS ON CACHE BOOL "")
set(LibBpf_USE_STATIC_LIBS ON CACHE BOOL "")
endif()

# Check if we are running Linux
Expand Down Expand Up @@ -113,6 +116,7 @@ find_package(Libpfm)
find_package(CUDAToolkit)
find_package(Radare)
find_package(Audit)
find_package(BpfObject)


# configurable options
Expand All @@ -132,6 +136,9 @@ CMAKE_DEPENDENT_OPTION(USE_VEOSINFO "Use libveosinfo to sample NEC SX-Aurora Tsu
add_feature_info("USE_VEOSINFO" USE_VEOSINFO "Use libveosinfo to sample NEC SX-Aurora Tsubasa cards.")
CMAKE_DEPENDENT_OPTION(USE_CUPTI "Use CUPTI to record CUDA activity." ON "CUDAToolkit_FOUND" OFF)
add_feature_info("USE_CUPTI" USE_CUPTI "Use CUPTI to record CUDA activity.")
CMAKE_DEPENDENT_OPTION(USE_BPF "Use BPF to record POSIX I/O activity" ON BpfObject_FOUND OFF)
add_feature_info("USE_BPF" USE_BPF "Use BPF to record POSIX I/O activity")

# system configuration checks
CHECK_INCLUDE_FILES(linux/hw_breakpoint.h HAVE_HW_BREAKPOINT_H)
CHECK_STRUCT_HAS_MEMBER("struct perf_event_attr" clockid linux/perf_event.h HAVE_PERF_EVENT_ATTR_CLOCKID)
Expand Down Expand Up @@ -419,6 +426,18 @@ target_include_directories(lo2s PRIVATE
${CMAKE_CURRENT_BINARY_DIR}/include
)


if (USE_BPF)
if (BpfObject_FOUND)
target_compile_definitions(lo2s PUBLIC HAVE_BPF)
bpf_object(posix_io src/perf/posix_io/posix_io.bpf.c)
add_dependencies(lo2s posix_io_skel)
target_link_libraries(lo2s PRIVATE posix_io_skel)
else()
message(SEND_ERROR "BPF not found but requested.")
endif()
endif()

add_subdirectory(man)

message(STATUS "Linux kernel version: ${LINUX_VERSION}")
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ In both modes, system-level metrics (e.g. tracepoints), are always grouped by th
* libaudit to resolve syscall names, otherwise only syscall nrs can be used in syscall tracing
* [pod2man](https://www.eyrie.org/~eagle/software/podlators/) to generate the man pages (typically distributed as part of `perl`)
* `gzip` to compress the man pages
* libbpf and bpftool to enable POSIX I/O recording

# Runtime Requirements

Expand Down
187 changes: 187 additions & 0 deletions cmake/FindBpfObject.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause

#[=======================================================================[.rst:
FindBpfObject
--------
Find BpfObject
This module finds if all the dependencies for eBPF Compile-Once-Run-Everywhere
programs are available and where all the components are located.
The caller may set the following variables to disable automatic
search/processing for the associated component:
``BPFOBJECT_BPFTOOL_EXE``
Path to ``bpftool`` binary
``BPFOBJECT_CLANG_EXE``
Path to ``clang`` binary
``LIBBPF_INCLUDE_DIRS``
Path to ``libbpf`` development headers
``LIBBPF_LIBRARIES``
Path to `libbpf` library
``BPFOBJECT_VMLINUX_H``
Path to ``vmlinux.h`` generated by ``bpftool``. If unset, this module will
attempt to automatically generate a copy.
This module sets the following result variables:
::
BpfObject_FOUND = TRUE if all components are found
This module also provides the ``bpf_object()`` macro. This macro generates a
cmake interface library for the BPF object's generated skeleton as well
as the associated dependencies.
.. code-block:: cmake
bpf_object(<name> <source>)
Given an abstract ``<name>`` for a BPF object and the associated ``<source>``
file, generates an interface library target, ``<name>_skel``, that may be
linked against by other cmake targets.
Example Usage:
::
find_package(BpfObject REQUIRED)
bpf_object(myobject myobject.bpf.c)
add_executable(myapp myapp.c)
target_link_libraries(myapp myobject_skel)
#]=======================================================================]

if(NOT BPFOBJECT_BPFTOOL_EXE)
find_program(BPFOBJECT_BPFTOOL_EXE NAMES bpftool DOC "Path to bpftool executable")
endif()

if(NOT BPFOBJECT_CLANG_EXE)
find_program(BPFOBJECT_CLANG_EXE NAMES clang DOC "Path to clang executable")

execute_process(COMMAND ${BPFOBJECT_CLANG_EXE} --version
OUTPUT_VARIABLE CLANG_version_output
ERROR_VARIABLE CLANG_version_error
RESULT_VARIABLE CLANG_version_result
OUTPUT_STRIP_TRAILING_WHITESPACE)

# Check that clang is new enough
if(${CLANG_version_result} EQUAL 0)
if("${CLANG_version_output}" MATCHES "clang version ([^\n]+)\n")
# Transform X.Y.Z into X;Y;Z which can then be interpreted as a list
set(CLANG_VERSION "${CMAKE_MATCH_1}")
string(REPLACE "." ";" CLANG_VERSION_LIST ${CLANG_VERSION})
list(GET CLANG_VERSION_LIST 0 CLANG_VERSION_MAJOR)

# Anything older than clang 10 doesn't really work
string(COMPARE LESS ${CLANG_VERSION_MAJOR} 10 CLANG_VERSION_MAJOR_LT10)
if(${CLANG_VERSION_MAJOR_LT10})
message(FATAL_ERROR "clang ${CLANG_VERSION} is too old for BPF CO-RE")
endif()

message(STATUS "Found clang version: ${CLANG_VERSION}")
else()
message(FATAL_ERROR "Failed to parse clang version string: ${CLANG_version_output}")
endif()
else()
message(FATAL_ERROR "Command \"${BPFOBJECT_CLANG_EXE} --version\" failed with output:\n${CLANG_version_error}")
endif()
endif()

find_package(LibBpf)

if(BPFOBJECT_VMLINUX_H)
get_filename_component(GENERATED_VMLINUX_DIR ${BPFOBJECT_VMLINUX_H} DIRECTORY)
elseif(BPFOBJECT_BPFTOOL_EXE)
# Generate vmlinux.h
set(GENERATED_VMLINUX_DIR ${CMAKE_CURRENT_BINARY_DIR}/include/lo2s)
set(BPFOBJECT_VMLINUX_H "${GENERATED_VMLINUX_DIR}/vmlinux.h")

file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/include/lo2s")
execute_process(COMMAND ${BPFOBJECT_BPFTOOL_EXE} btf dump file /sys/kernel/btf/vmlinux format c
OUTPUT_FILE ${BPFOBJECT_VMLINUX_H}
ERROR_VARIABLE VMLINUX_error
RESULT_VARIABLE VMLINUX_result)
if(${VMLINUX_result} EQUAL 0)
set(VMLINUX ${BPFOBJECT_VMLINUX_H})
else()
message(FATAL_ERROR "Failed to dump vmlinux.h from BTF: ${VMLINUX_error}")
endif()
endif()

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(BpfObject
REQUIRED_VARS
BPFOBJECT_BPFTOOL_EXE
BPFOBJECT_CLANG_EXE
LIBBPF_INCLUDE_DIRS
LIBBPF_LIBRARIES
GENERATED_VMLINUX_DIR)

# Get clang bpf system includes
execute_process(
COMMAND bash -c "${BPFOBJECT_CLANG_EXE} -v -E - < /dev/null 2>&1 |
sed -n '/<...> search starts here:/,/End of search list./{ s| \\(/.*\\)|-idirafter \\1|p }'"
OUTPUT_VARIABLE CLANG_SYSTEM_INCLUDES_output
ERROR_VARIABLE CLANG_SYSTEM_INCLUDES_error
RESULT_VARIABLE CLANG_SYSTEM_INCLUDES_result
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(${CLANG_SYSTEM_INCLUDES_result} EQUAL 0)
separate_arguments(CLANG_SYSTEM_INCLUDES UNIX_COMMAND ${CLANG_SYSTEM_INCLUDES_output})
message(STATUS "BPF system include flags: ${CLANG_SYSTEM_INCLUDES}")
else()
message(FATAL_ERROR "Failed to determine BPF system includes: ${CLANG_SYSTEM_INCLUDES_error}")
endif()

# Get target arch
execute_process(COMMAND uname -m
COMMAND sed -e "s/x86_64/x86/" -e "s/aarch64/arm64/" -e "s/ppc64le/powerpc/" -e "s/mips.*/mips/"
OUTPUT_VARIABLE ARCH_output
ERROR_VARIABLE ARCH_error
RESULT_VARIABLE ARCH_result
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(${ARCH_result} EQUAL 0)
set(ARCH ${ARCH_output})
message(STATUS "BPF target arch: ${ARCH}")
else()
message(FATAL_ERROR "Failed to determine target architecture: ${ARCH_error}")
endif()

# Public macro
macro(bpf_object name input)
set(BPF_C_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${input})
set(BPF_O_FILE ${CMAKE_CURRENT_BINARY_DIR}/${name}.bpf.o)
set(BPF_SKEL_FILE ${CMAKE_CURRENT_BINARY_DIR}/include/lo2s/${name}.skel.h)
message(STATUS ${CMAKE_CURRENT_BINARY_DIR})
set(OUTPUT_TARGET ${name}_skel)

# Build BPF object file
add_custom_command(OUTPUT ${BPF_O_FILE}
COMMAND ${BPFOBJECT_CLANG_EXE} -g -O2 -target bpf -D__TARGET_ARCH_${ARCH}
${CLANG_SYSTEM_INCLUDES} -I${GENERATED_VMLINUX_DIR}
-I${CMAKE_SOURCE_DIR}/include
-isystem ${LIBBPF_INCLUDE_DIRS} -c ${BPF_C_FILE} -o ${BPF_O_FILE}
COMMAND_EXPAND_LISTS
VERBATIM
DEPENDS ${BPF_C_FILE}
COMMENT "[clang] Building BPF object: ${name}")

# Build BPF skeleton header
add_custom_command(OUTPUT ${BPF_SKEL_FILE}
COMMAND bash -c "${BPFOBJECT_BPFTOOL_EXE} gen skeleton ${BPF_O_FILE} > ${BPF_SKEL_FILE}"
VERBATIM
DEPENDS ${BPF_O_FILE}
COMMENT "[skel] Building BPF skeleton: ${name}")

add_library(${OUTPUT_TARGET} INTERFACE)
target_sources(${OUTPUT_TARGET} INTERFACE ${BPF_SKEL_FILE})
target_include_directories(${OUTPUT_TARGET} INTERFACE ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories(${OUTPUT_TARGET} SYSTEM INTERFACE ${LIBBPF_INCLUDE_DIRS})
target_link_libraries(${OUTPUT_TARGET} INTERFACE LibBpf::LibBpf)
endmacro()
98 changes: 98 additions & 0 deletions cmake/FindLibBpf.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
option(LibBpf_USE_STATIC_LIBS "Link LibBpf statically." ON)

find_path(LIBBPF_INCLUDE_DIRS
NAMES
bpf/bpf.h
bpf/btf.h
bpf/libbpf.h
PATHS
/usr/include
/usr/local/include
/opt/local/include
/sw/include
ENV CPATH)


if(LibBpf_USE_STATIC_LIBS)
find_library(LIBBPF_LIBRARIES
NAMES
libbpf.a
PATHS
/usr/lib
/usr/local/lib
/opt/local/lib
/sw/lib
ENV LIBRARY_PATH
ENV LD_LIBRARY_PATH)
find_library(LIBELF_LIBRARIES
NAMES
libelf.a
PATHS
/usr/lib
/usr/local/lib
/opt/local/lib
/sw/lib
ENV LIBRARY_PATH
ENV LD_LIBRARY_PATH)
find_library(LIBZ_LIBRARIES
NAMES
libz.a
PATHS
/usr/lib
/usr/local/lib
/opt/local/lib
/sw/lib
ENV LIBRARY_PATH
ENV LD_LIBRARY_PATH)
else()
find_library(LIBBPF_LIBRARIES
NAMES
libbpf.so
PATHS
/usr/lib
/usr/local/lib
/opt/local/lib
/sw/lib
ENV LIBRARY_PATH
ENV LD_LIBRARY_PATH)
find_library(LIBELF_LIBRARIES
NAMES
libelf.so
PATHS
/usr/lib
/usr/local/lib
/opt/local/lib
/sw/lib
ENV LIBRARY_PATH
ENV LD_LIBRARY_PATH)
find_library(LIBZ_LIBRARIES
NAMES
libz.so
PATHS
/usr/lib
/usr/local/lib
/opt/local/lib
/sw/lib
ENV LIBRARY_PATH
ENV LD_LIBRARY_PATH)
endif()

include (FindPackageHandleStandardArgs)

FIND_PACKAGE_HANDLE_STANDARD_ARGS(LibBpf DEFAULT_MSG
LIBBPF_LIBRARIES
LIBELF_LIBRARIES
LIBZ_LIBRARIES
LIBBPF_INCLUDE_DIRS)

list(APPEND LIBBPF_LIBRARIES ${LIBELF_LIBRARIES})
list(APPEND LIBBPF_LIBRARIES ${LIBZ_LIBRARIES})

if(LibBpf_FOUND)
add_library(libbpf INTERFACE)
target_link_libraries(libbpf INTERFACE ${LIBBPF_LIBRARIES})
target_include_directories(libbpf INTERFACE ${LIBBPF_INCLUDE_DIRS})
add_library(LibBpf::LibBpf ALIAS libbpf)
endif()
mark_as_advanced(LIBBPF_INCLUDE_DIRS LIBBPF_LIBRARIES)
2 changes: 2 additions & 0 deletions include/lo2s/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ struct Config
bool use_x86_energy = false;
// block I/O
bool use_block_io = false;
// posix I/O
bool use_posix_io = false;
// syscalls
bool use_syscalls = false;
std::vector<int64_t> syscall_filter;
Expand Down
8 changes: 8 additions & 0 deletions include/lo2s/measurement_scope.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ enum class MeasurementScopeType
SYSCALL,
CUDA,
TRACEPOINT,
POSIX_IO,
UNKNOWN
};

Expand Down Expand Up @@ -91,6 +92,11 @@ struct MeasurementScope
return { MeasurementScopeType::TRACEPOINT, s };
}

static MeasurementScope posix_io(ExecutionScope s)
{
return { MeasurementScopeType::POSIX_IO, s };
}

friend bool operator==(const MeasurementScope& lhs, const MeasurementScope& rhs)
{
return (lhs.scope == rhs.scope) && lhs.type == rhs.type;
Expand Down Expand Up @@ -127,6 +133,8 @@ struct MeasurementScope
return fmt::format("cuda kernel events for {}", scope.name());
case MeasurementScopeType::TRACEPOINT:
return fmt::format("tracepoint events for {}", scope.name());
case MeasurementScopeType::POSIX_IO:
return fmt::format("POSIX I/O events for {}", scope.name());
default:
throw new std::runtime_error("Unknown ExecutionScopeType!");
}
Expand Down
Empty file.
Loading

0 comments on commit c670be1

Please sign in to comment.