From d28b61ed56fc3a02792988ec5ffca03e715d9073 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Mon, 24 Feb 2025 14:20:53 +0200 Subject: [PATCH] detect netdata exit reasons (#19617) * detect netdata exit reasons * log exit initiated * commented debug logs * commented debug logs again * fix windows system shutdown detection * commented debug logs again * added exit reason msgid * test shutdown detection by writing to exit.reason * implement status file loading/saving * accept also the shutdown event * fix windows logs * run as service from the script - not working yet * save the first fatal message into the status file * save memory information in the status file * load machine guid early enough * fix loading sequence * simplify function run once logic; add dependencies on netdata.conf loading when required * accept service parameter * build for packaging is required for services * log last exit status with a proper message; log node id and claim id in the status file * added /var/cache disk space; fixed bug in rfc3339 parsing * change log priority based on condition * SIGINT is normal exit under windows * wait to wevt provider to be initialized before logging * Revert "fix windows logs (#19632)" This reverts commit d8c3dc087c7285400b229f972d081e1df340fbf2. * fix windows logs - the right way * set default event log sizes * added detection of netdata update * added systemd dbus watcher for systemd shutdown/suspend events * log system shutdown * detect system reboot in a better way * cleanup static thread * on fatal, call _exit(); linunwind should not skip top calls on the stack * make the sd bus watcher exit on netdata shutdown * make the netdata agent version log also print the last exit status * start watcher when shutdown is initiated; prevent double logging of shutdown initiation * prepare for sending reports * a single read per receiver * track memory calls per worker * use 4 malloc arenas on parents * spread higher tiers flushing over time * pgc and replication tuning * on child disconnect, get retention from the rrdcontexts worker * BUFFER: the default size is now 1024 bytes * use dedicated jemalloc arena for judy allocations * ARAL: do not double the page size unconditionally; cleanup old members * double pgc partitions * fix compiler warning * make the default replication commit buffer big enough to avoid constant realloc * post crash reports * revert log2journal changes * log2journal minor * disable the crash report when there was no status file * increase buffer sizes * added os_boottime() and os_boot_id(), which are now used in the status file * log2journal: convert \u000A to \n * fix headers includes * fix compilation on non-linux * for host prefix when getting boot_id and boottime * write status file to /run/netdata too * fix /run/netdata on startup * move the IPC pipe inside the run directory * exclusive file lock to avoid running concurrently * allow netdatacli to run from any user and still find the run dir of netdata * fix pipe failure message * fix nested loop sharing same variable in ADCS * fix run_dir and netdatacli on windows * fix status files on windows * initialize nd_threads early enough to allow creating threads during initialization * fix compiler warnings * on shutdown ignore points with delayed flushing * fix macos compilation * added os_type to daemon status * make daemon status schema ecs compatible * save daemon status file on every signal * fix external plugins log to journal * use special allocators for judy, only on netdata - not the external plugins * systemd-cat-native: default newline string is \n * when generating json, prefer special 2 character sequences for common control characters * fix daemon-status filenames * log errors when the status file cannot be opened/saved/parsed * make status file world readable * do not write status file in /run/netdata; add fall back locations when the file cannot be saved in the cache dir * move ram and disk into host * simplified inline subobject parsing for jsonc * ensure path is an array of at least 128 bytes * fix non-linux compilation --- CMakeLists.txt | 73 ++ packaging/cmake/config.cmake.h.in | 3 + packaging/utils/compile-and-run-windows.sh | 59 +- src/aclk/aclk.c | 14 +- src/claim/cloud-conf.c | 2 + src/cli/cli.c | 2 +- .../cgroups.plugin/cgroup-name.sh.in | 4 +- .../cgroup-network-helper.sh.in | 4 +- .../charts.d.plugin/charts.d.plugin.in | 4 +- src/collectors/cups.plugin/cups_plugin.c | 6 +- .../freebsd.plugin/plugin_freebsd.c | 2 +- src/collectors/ioping.plugin/ioping.plugin.in | 4 +- src/collectors/log2journal/log2journal-json.c | 136 ++-- src/collectors/nfacct.plugin/plugin_nfacct.c | 2 +- src/collectors/perf.plugin/perf_plugin.c | 2 +- .../systemd-journal-annotations.c | 1 + .../systemd-journal-files.c | 12 +- .../systemd-journal-watcher.c | 8 +- src/collectors/tc.plugin/tc-qos-helper.sh.in | 4 +- src/collectors/windows.plugin/perflib-adcs.c | 7 +- .../xenstat.plugin/xenstat_plugin.c | 2 +- src/daemon/analytics.c | 156 +++-- src/daemon/analytics.h | 1 + src/daemon/buildinfo.c | 18 +- src/daemon/commands.c | 2 +- src/daemon/common.h | 2 +- .../netdata-conf-backwards-compatibility.c | 2 + src/daemon/config/netdata-conf-db.c | 10 +- src/daemon/config/netdata-conf-directories.c | 4 +- src/daemon/config/netdata-conf-global.c | 6 +- src/daemon/config/netdata-conf-logs.c | 6 +- src/daemon/config/netdata-conf-profile.c | 9 +- src/daemon/config/netdata-conf-ssl.c | 4 +- src/daemon/config/netdata-conf-web.c | 12 +- src/daemon/config/netdata-conf.c | 5 +- src/daemon/daemon-service.c | 2 +- src/daemon/daemon-service.h | 3 +- src/daemon/daemon-shutdown.c | 56 +- src/daemon/daemon-status-file.c | 652 ++++++++++++++++++ src/daemon/daemon-status-file.h | 75 ++ src/daemon/daemon-systemd-watcher.c | 154 +++++ src/daemon/daemon-systemd-watcher.h | 8 + src/daemon/daemon.c | 3 +- src/daemon/daemon.h | 2 - src/daemon/main.c | 77 ++- src/daemon/pipename.c | 25 +- src/daemon/pulse/pulse-daemon-memory-system.c | 2 +- src/daemon/pulse/pulse-daemon-memory.c | 2 +- src/daemon/pulse/pulse-workers.c | 92 +++ src/daemon/signals.c | 29 +- src/daemon/static_threads.c | 11 + src/daemon/winsvc.cc | 22 +- src/database/contexts/rrdcontext.c | 2 +- src/database/contexts/worker.c | 5 + src/database/engine/cache.c | 14 +- src/database/engine/datafile.c | 8 +- src/database/engine/journalfile.c | 12 +- src/database/engine/page.c | 8 +- src/database/engine/pdc.c | 8 +- src/database/engine/rrdengine.c | 8 +- src/database/rrddim-collection.c | 83 ++- src/database/rrddim-collection.h | 2 + src/database/rrddim.c | 13 +- src/database/rrdhost.h | 1 + src/database/rrdset-index-id.c | 20 +- src/database/rrdset-index-id.h | 2 + src/database/rrdset.h | 3 +- src/database/storage-engine.h | 6 +- src/health/health_dyncfg.c | 16 +- src/health/notifications/alarm-notify.sh.in | 4 +- src/health/rrdcalc.c | 2 +- src/libnetdata/aral/aral.c | 40 +- src/libnetdata/buffer/buffer.c | 9 +- src/libnetdata/buffer/buffer.h | 217 ++++-- src/libnetdata/common.h | 5 + src/libnetdata/datetime/rfc3339.c | 81 ++- src/libnetdata/exit/exit_initiated.c | 112 +++ src/libnetdata/exit/exit_initiated.h | 54 ++ src/libnetdata/facets/logs_query_status.h | 3 +- src/libnetdata/json/json-c-parser-inline.c | 36 +- src/libnetdata/json/json-c-parser-inline.h | 67 +- src/libnetdata/libjudy/judy-malloc.c | 97 ++- src/libnetdata/libjudy/judy-malloc.h | 2 + src/libnetdata/libnetdata.c | 2 - src/libnetdata/libnetdata.h | 7 +- src/libnetdata/log/nd_log-field-formatters.c | 76 +- src/libnetdata/log/nd_log-init.c | 11 +- src/libnetdata/log/nd_log-internals.h | 4 + src/libnetdata/log/nd_log-libunwind.c | 11 +- src/libnetdata/log/nd_log-to-windows-events.c | 40 +- src/libnetdata/log/nd_log.c | 40 +- src/libnetdata/log/nd_log.h | 4 + src/libnetdata/log/nd_wevents_manifest.xml | 295 -------- src/libnetdata/log/systemd-cat-native.c | 6 +- src/libnetdata/log/systemd-cat-native.md | 25 +- src/libnetdata/log/systemd-journal-helpers.c | 1 + src/libnetdata/log/systemd-journal-helpers.h | 2 +- src/libnetdata/log/wevt_netdata_install.bat | 7 + src/libnetdata/memory/nd-mallocz.c | 20 +- src/libnetdata/memory/nd-mallocz.h | 4 +- src/libnetdata/memory/nd-mmap.c | 5 +- src/libnetdata/os/boot_id.c | 78 +++ src/libnetdata/os/boot_id.h | 25 + src/libnetdata/os/boottime.c | 125 ++++ src/libnetdata/os/boottime.h | 19 + src/libnetdata/os/disk_space.c | 92 +++ src/libnetdata/os/disk_space.h | 21 + src/libnetdata/os/file_lock.c | 112 +++ src/libnetdata/os/file_lock.h | 49 ++ src/libnetdata/os/file_metadata.c | 72 ++ src/libnetdata/os/file_metadata.h | 19 + src/libnetdata/os/os.h | 7 + src/libnetdata/os/process_path.c | 90 +++ src/libnetdata/os/process_path.h | 13 + src/libnetdata/os/run_dir.c | 128 ++++ src/libnetdata/os/run_dir.h | 17 + src/libnetdata/os/system_memory.c | 109 ++- src/libnetdata/os/system_memory.h | 3 + src/libnetdata/required_dummies.h | 4 +- src/libnetdata/uuid/uuid.h | 1 + .../worker_utilization/worker_utilization.c | 49 +- .../worker_utilization/worker_utilization.h | 21 + src/ml/ml_memory.cc | 6 + src/registry/registry.h | 3 +- src/registry/registry_init.c | 25 +- .../protocol/command-begin-set-end-init.c | 2 +- src/streaming/stream-circular-buffer.h | 3 +- src/streaming/stream-conf.c | 4 +- src/streaming/stream-receiver.c | 9 +- src/streaming/stream-replication-sender.c | 4 +- src/streaming/stream-sender-commit.c | 12 +- src/streaming/stream-sender-commit.h | 4 +- src/streaming/stream-sender.c | 6 +- .../api/functions/function-bearer_get_token.c | 3 +- src/web/server/web_client.c | 4 +- src/web/server/web_client.h | 2 +- 136 files changed, 3510 insertions(+), 934 deletions(-) create mode 100644 src/daemon/daemon-status-file.c create mode 100644 src/daemon/daemon-status-file.h create mode 100644 src/daemon/daemon-systemd-watcher.c create mode 100644 src/daemon/daemon-systemd-watcher.h create mode 100644 src/libnetdata/exit/exit_initiated.c create mode 100644 src/libnetdata/exit/exit_initiated.h delete mode 100644 src/libnetdata/log/nd_wevents_manifest.xml create mode 100644 src/libnetdata/os/boot_id.c create mode 100644 src/libnetdata/os/boot_id.h create mode 100644 src/libnetdata/os/boottime.c create mode 100644 src/libnetdata/os/boottime.h create mode 100644 src/libnetdata/os/disk_space.c create mode 100644 src/libnetdata/os/disk_space.h create mode 100644 src/libnetdata/os/file_lock.c create mode 100644 src/libnetdata/os/file_lock.h create mode 100644 src/libnetdata/os/file_metadata.c create mode 100644 src/libnetdata/os/file_metadata.h create mode 100644 src/libnetdata/os/process_path.c create mode 100644 src/libnetdata/os/process_path.h create mode 100644 src/libnetdata/os/run_dir.c create mode 100644 src/libnetdata/os/run_dir.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 22a097861d8d25..c03f6ac9d0a80b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -268,6 +268,41 @@ if(ENABLE_MIMALLOC) netdata_add_mimalloc() endif() +option(ENABLE_JEMALLOC "Disable jemalloc allocator" OFF) + +if(ENABLE_JEMALLOC) + pkg_check_modules(JEMALLOC QUIET jemalloc) + if(JEMALLOC_FOUND) + # Check if jemalloc has arena API + set(CMAKE_REQUIRED_INCLUDES ${JEMALLOC_INCLUDE_DIRS}) + set(CMAKE_REQUIRED_LIBRARIES ${JEMALLOC_LIBRARIES}) + check_c_source_compiles(" + #include + int main() { + unsigned narenas; + size_t sz = sizeof(narenas); + mallctl(\"arenas.narenas\", &narenas, &sz, NULL, 0); + return 0; + } + " HAVE_JEMALLOC_ARENA_API) + + if(HAVE_JEMALLOC_ARENA_API) + set(ENABLE_JEMALLOC ON CACHE BOOL "Enable jemalloc allocator" FORCE) + message(STATUS "Jemalloc found with arena API support - enabling") + else() + if(ENABLE_JEMALLOC) + message(FATAL_ERROR "Jemalloc was found but does not have arena API support") + endif() + message(STATUS "Jemalloc found but does not have arena API support - disabling") + endif() + else() + if(ENABLE_JEMALLOC) + message(FATAL_ERROR "Jemalloc support was explicitly enabled but jemalloc was not found") + endif() + message(STATUS "Jemalloc not found - disabling") + endif() +endif() + if(ENABLE_PLUGIN_GO) include(NetdataGoTools) @@ -449,6 +484,8 @@ check_function_exists(arc4random_uniform HAVE_ARC4RANDOM_UNIFORM) check_function_exists(getrandom HAVE_GETRANDOM) check_function_exists(sysinfo HAVE_SYSINFO) +check_function_exists(timegm HAVE_TIMEGM) + # # check source compilation # @@ -456,6 +493,15 @@ check_function_exists(sysinfo HAVE_SYSINFO) include(CheckCSourceCompiles) include(CheckCXXSourceCompiles) +check_c_source_compiles(" +#include +int main(void) { + struct tm t; + (void)t.tm_gmtoff; + return 0; +} +" HAVE_TM_GMTOFF) + set(CMAKE_REQUIRED_LIBRARIES pthread) check_c_source_compiles(" #define _GNU_SOURCE @@ -994,6 +1040,22 @@ set(LIBNETDATA_FILES src/libnetdata/os/get_system_pagesize.h src/libnetdata/os/hostname.c src/libnetdata/os/hostname.h + src/libnetdata/exit/exit_initiated.c + src/libnetdata/exit/exit_initiated.h + src/libnetdata/os/disk_space.c + src/libnetdata/os/disk_space.h + src/libnetdata/os/file_metadata.c + src/libnetdata/os/file_metadata.h + src/libnetdata/os/process_path.c + src/libnetdata/os/process_path.h + src/libnetdata/os/boottime.c + src/libnetdata/os/boottime.h + src/libnetdata/os/boot_id.c + src/libnetdata/os/boot_id.h + src/libnetdata/os/run_dir.c + src/libnetdata/os/run_dir.h + src/libnetdata/os/file_lock.c + src/libnetdata/os/file_lock.h ) list(APPEND LIBNETDATA_FILES ${INICFG_FILES}) @@ -1182,8 +1244,12 @@ set(DAEMON_FILES src/daemon/pulse/pulse-db-dbengine-retention.h src/daemon/pulse/pulse-parents.c src/daemon/pulse/pulse-parents.h + src/daemon/daemon-status-file.c + src/daemon/daemon-status-file.h src/daemon/config/netdata-conf-ssl.c src/daemon/config/netdata-conf-ssl.h + src/daemon/daemon-systemd-watcher.c + src/daemon/daemon-systemd-watcher.h ) set(H2O_FILES @@ -2170,6 +2236,13 @@ netdata_add_jsonc_to_target(libnetdata) netdata_add_libyaml_to_target(libnetdata) +# jemalloc +if(ENABLE_JEMALLOC) + target_link_libraries(libnetdata PUBLIC ${JEMALLOC_LIBRARIES}) + target_include_directories(libnetdata PUBLIC ${JEMALLOC_INCLUDE_DIRS}) + target_compile_options(libnetdata PUBLIC ${JEMALLOC_CFLAGS_OTHER}) +endif() + # libunwind if(ENABLE_LIBUNWIND) pkg_check_modules(LIBUNWIND libunwind IMPORTED_TARGET) diff --git a/packaging/cmake/config.cmake.h.in b/packaging/cmake/config.cmake.h.in index 6dd05a828e2b9b..fe159910a4e64e 100644 --- a/packaging/cmake/config.cmake.h.in +++ b/packaging/cmake/config.cmake.h.in @@ -79,6 +79,8 @@ #cmakedefine HAVE_RAND_S #cmakedefine HAVE_GETRANDOM #cmakedefine HAVE_SYSINFO +#cmakedefine HAVE_TIMEGM +#cmakedefine HAVE_TM_GMTOFF #cmakedefine HAVE_LIBUNWIND #cmakedefine HAVE_BACKTRACE @@ -116,6 +118,7 @@ #cmakedefine HAVE_FUNC_ATTRIBUTE_NORETURN #cmakedefine HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL #cmakedefine HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT +#cmakedefine HAVE_JEMALLOC_ARENA_API // enabled features diff --git a/packaging/utils/compile-and-run-windows.sh b/packaging/utils/compile-and-run-windows.sh index 2d540eee94e78e..9944e9ddfb2ab6 100644 --- a/packaging/utils/compile-and-run-windows.sh +++ b/packaging/utils/compile-and-run-windows.sh @@ -1,4 +1,6 @@ -#!/bin/sh +#!/bin/bash + +RUN_AS_SERVICE=0 # On MSYS2, install these dependencies to build netdata: install_dependencies() { @@ -21,13 +23,21 @@ install_dependencies() { msys/libcurl msys/libcurl-devel } +BUILD_FOR_PACKAGING="Off" + if [ "${1}" = "install" ] then install_dependencies || exit 1 exit 0 fi -BUILD_FOR_PACKAGING="Off" +if [ "${1}" = "service" ] +then + RUN_AS_SERVICE=1 + BUILD_FOR_PACKAGING="On" + shift +fi + if [ "${1}" = "package" ] then BUILD_FOR_PACKAGING="On" @@ -70,12 +80,18 @@ then ${NULL} fi +echo "Compiling Netdata..." ninja -v -C "${build}" || ninja -v -C "${build}" -j 1 -echo "Stopping service Netdata" -sc stop "Netdata" || echo "Failed" +echo "Stopping service Netdata..." +sc stop "Netdata" || echo "stop Failed, ok" + +if [ $RUN_AS_SERVICE -eq 1 ]; then + sc delete "Netdata" || echo "delete Failed, ok" +fi -ninja -v -C "${build}" install || ninja -v -C "${build}" -j 1 +rm -f /opt/netdata/usr/bin/*.dll || echo "deleting old .dll files failed, ok" +ninja -v -C "${build}" install # register the event log publisher cmd.exe //c "$(cygpath -w -a "/opt/netdata/usr/bin/wevt_netdata_install.bat")" @@ -84,9 +100,32 @@ cmd.exe //c "$(cygpath -w -a "/opt/netdata/usr/bin/wevt_netdata_install.bat")" #echo "Compile with:" #echo "ninja -v -C \"${build}\" install || ninja -v -C \"${build}\" -j 1" -echo "starting netdata..." -# enable JIT debug with gdb -export MSYS="error_start:$(cygpath -w /usr/bin/gdb)" +if [ $RUN_AS_SERVICE -eq 1 ]; then + echo + echo "Copying library files to /opt/netdata/usr/bin ..." + ldd /opt/netdata/usr/bin/netdata |\ + grep " => /usr/bin/" |\ + sed -e 's|\s\+| |g' -e 's|^ ||g' |\ + cut -d ' ' -f 3 |\ + while read x; do + cp $x /opt/netdata/usr/bin/ + done -rm -rf /opt/netdata/var/log/netdata/*.log || echo -/opt/netdata/usr/bin/netdata -D + echo + echo "Registering Netdata service..." + sc create "Netdata" binPath= "$(cygpath.exe -w /opt/netdata/usr/bin/netdata.exe)" start= auto + + echo "Starting Netdata service..." + sc start "Netdata" + +else + + echo "Starting netdata..." + + # enable JIT debug with gdb + export MSYS="error_start:$(cygpath -w /usr/bin/gdb)" + + rm -rf /opt/netdata/var/log/netdata/*.log || echo + /opt/netdata/usr/bin/netdata -D + +fi diff --git a/src/aclk/aclk.c b/src/aclk/aclk.c index d84cebb0b4a025..4baf3b153741ca 100644 --- a/src/aclk/aclk.c +++ b/src/aclk/aclk.c @@ -199,7 +199,7 @@ static int wait_till_agent_claimed(void) * @param aclk_hostname points to location where string pointer to hostname will be set * @param aclk_port port to int where port will be saved * - * @return If non 0 returned irrecoverable error happened (or netdata_exit) and ACLK should be terminated + * @return If non 0 returned irrecoverable error happened (or exit_initiated) and ACLK should be terminated */ static int wait_till_agent_claim_ready() { @@ -306,7 +306,7 @@ static int handle_connection(mqtt_wss_client client) { while (service_running(SERVICE_ACLK)) { // timeout 1000 to check at least once a second - // for netdata_exit + // for exit_initiated int rc = mqtt_wss_service(client, 1000); if (rc < 0){ worker_is_busy(WORKER_ACLK_DISCONNECTED); @@ -452,9 +452,9 @@ static unsigned long aclk_reconnect_delay() { return aclk_tbeb_delay(0, aclk_env->backoff.base, aclk_env->backoff.min_s, aclk_env->backoff.max_s); } -/* Block till aclk_reconnect_delay is satisfied or netdata_exit is signalled +/* Block till aclk_reconnect_delay is satisfied or exit_initiated is signalled * @return 0 - Go ahead and connect (delay expired) - * 1 - netdata_exit + * 1 - exit_initiated */ #define NETDATA_EXIT_POLL_MS (MSEC_PER_SEC/4) static int aclk_block_till_recon_allowed() { @@ -466,7 +466,7 @@ static int aclk_block_till_recon_allowed() { nd_log(NDLS_DAEMON, NDLP_DEBUG, "Wait before attempting to reconnect in %.3f seconds", recon_delay / (float)MSEC_PER_SEC); - // we want to wake up from time to time to check netdata_exit + // we want to wake up from time to time to check exit_initiated worker_is_busy(WORKER_ACLK_WAITING_TO_CONNECT); while (recon_delay) { @@ -602,7 +602,7 @@ const char *aclk_cloud_base_url = NULL; * @param client instance of mqtt_wss_client * @return 0 - Successful Connection, * <0 - Irrecoverable Error -> Kill ACLK, - * >0 - netdata_exit + * >0 - exit_initiated */ #define CLOUD_BASE_URL_READ_RETRY 30 #ifdef ACLK_SSL_ALLOW_SELF_SIGNED @@ -865,7 +865,7 @@ void *aclk_main(void *ptr) mqtt_wss_set_max_buf_size(mqttwss_client, 25*1024*1024); // Keep reconnecting and talking until our time has come - // and the Grim Reaper (netdata_exit) calls + // and the Grim Reaper (exit_initiated) calls netdata_log_info("ACLK: Starting ACLK query event loop"); aclk_query_init(mqttwss_client); do { diff --git a/src/claim/cloud-conf.c b/src/claim/cloud-conf.c index cf6c867d29c0f5..134ccb32098049 100644 --- a/src/claim/cloud-conf.c +++ b/src/claim/cloud-conf.c @@ -54,6 +54,8 @@ static void cloud_conf_load_defaults(void) { } void cloud_conf_load(int silent) { + netdata_conf_section_directories(); + errno_clear(); char *filename = filename_from_path_entry_strdupz(netdata_configured_cloud_dir, "cloud.conf"); int ret = inicfg_load(&cloud_config, filename, 1, NULL); diff --git a/src/cli/cli.c b/src/cli/cli.c index 2a6e570e46e4c9..ea7cff735570f3 100644 --- a/src/cli/cli.c +++ b/src/cli/cli.c @@ -133,7 +133,7 @@ static void connect_cb(uv_connect_t* req, int status) (void)req; if (status) { fprintf(stderr, "uv_pipe_connect(): %s\n", uv_strerror(status)); - fprintf(stderr, "Make sure the netdata service is running.\n"); + fprintf(stderr, "Cannot connect to '%s'.\nMake sure the netdata service is running.\n", daemon_pipename()); exit(-1); } if (0 == command_string_size) { diff --git a/src/collectors/cgroups.plugin/cgroup-name.sh.in b/src/collectors/cgroups.plugin/cgroup-name.sh.in index 2c7ad5423e95fc..e02650f6bf1764 100755 --- a/src/collectors/cgroups.plugin/cgroup-name.sh.in +++ b/src/collectors/cgroups.plugin/cgroup-name.sh.in @@ -72,14 +72,14 @@ log() { [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return - systemd-cat-native --log-as-netdata --newline="--NEWLINE--" < 3) { + *(*d)++ = '\\'; + *(*d)++ = 'n'; + (*remaining) -= 2; + } +} + +static inline void copy_tab(LOG_JSON_STATE *js __maybe_unused, char **d, size_t *remaining) { + if(*remaining > 3) { + *(*d)++ = '\\'; + *(*d)++ = 't'; + (*remaining) -= 2; + } +} + static inline bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) { if (codepoint <= 0x7F) { // 1-byte sequence if (*remaining < 2) return false; // +1 for the null - *(*d)++ = (char)codepoint; + if(codepoint == '\n') + copy_newline(NULL, d, remaining); + else if(codepoint == '\t') + copy_tab(NULL, d, remaining); + else + *(*d)++ = (char)codepoint; (*remaining)--; } else if (codepoint <= 0x7FF) { @@ -255,22 +276,6 @@ size_t parse_surrogate(const char *s, char *d, size_t *remaining) { } } -static inline void copy_newline(LOG_JSON_STATE *js __maybe_unused, char **d, size_t *remaining) { - if(*remaining > 3) { - *(*d)++ = '\\'; - *(*d)++ = 'n'; - (*remaining) -= 2; - } -} - -static inline void copy_tab(LOG_JSON_STATE *js __maybe_unused, char **d, size_t *remaining) { - if(*remaining > 3) { - *(*d)++ = '\\'; - *(*d)++ = 't'; - (*remaining) -= 2; - } -} - static inline bool json_parse_string(LOG_JSON_STATE *js) { static __thread char value[JOURNAL_MAX_VALUE_LEN]; @@ -511,35 +516,33 @@ static inline bool json_parse_array(LOG_JSON_STATE *js) { json_consume_char(js); - size_t index = 0; - do { - const char *s = json_current_pos(js); - if(*s == ']') { - json_consume_char(js); - break; - } - - if(!json_key_index_and_push(js, index)) - return false; + const char *s = json_current_pos(js); + if(*s == ']') + json_consume_char(js); + else { + size_t index = 0; + do { + if (!json_key_index_and_push(js, index)) + return false; - if(!json_parse_value(js)) - return false; + if (!json_parse_value(js)) + return false; - json_key_pop(js); + json_key_pop(js); - if(!json_expect_char_after_white_space(js, ",]")) - return false; + if (!json_expect_char_after_white_space(js, ",]")) + return false; - s = json_current_pos(js); - json_consume_char(js); - if(*s == ',') { - index++; - continue; - } - else // } - break; + s = json_current_pos(js); + json_consume_char(js); + if (*s == ',') { + index++; + continue; + } else // ']' + break; - } while(true); + } while (true); + } return true; } @@ -550,40 +553,39 @@ static inline bool json_parse_object(LOG_JSON_STATE *js) { json_consume_char(js); - do { - const char *s = json_current_pos(js); - if(*s == '}') { - json_consume_char(js); - break; - } - - if (!json_expect_char_after_white_space(js, "\"")) - return false; + const char *s = json_current_pos(js); + if(*s == '}') + json_consume_char(js); + else { + do { + if (!json_expect_char_after_white_space(js, "\"")) + return false; - if(!json_parse_key_and_push(js)) - return false; + if (!json_parse_key_and_push(js)) + return false; - if(!json_expect_char_after_white_space(js, ":")) - return false; + if (!json_expect_char_after_white_space(js, ":")) + return false; - json_consume_char(js); + json_consume_char(js); - if(!json_parse_value(js)) - return false; + if (!json_parse_value(js)) + return false; - json_key_pop(js); + json_key_pop(js); - if(!json_expect_char_after_white_space(js, ",}")) - return false; + if (!json_expect_char_after_white_space(js, ",}")) + return false; - s = json_current_pos(js); - json_consume_char(js); - if(*s == ',') - continue; - else // } - break; + s = json_current_pos(js); + json_consume_char(js); + if (*s == ',') + continue; + else // '}' + break; - } while(true); + } while (true); + } return true; } diff --git a/src/collectors/nfacct.plugin/plugin_nfacct.c b/src/collectors/nfacct.plugin/plugin_nfacct.c index edf17bfac6b7d7..18544abac5e495 100644 --- a/src/collectors/nfacct.plugin/plugin_nfacct.c +++ b/src/collectors/nfacct.plugin/plugin_nfacct.c @@ -837,7 +837,7 @@ int main(int argc, char **argv) { for(iteration = 0; 1; iteration++) { usec_t dt = heartbeat_next(&hb); - if(unlikely(netdata_exit)) break; + if(unlikely(exit_initiated)) break; if(debug && iteration) fprintf(stderr, "nfacct.plugin: iteration %zu, dt %"PRIu64" usec\n" diff --git a/src/collectors/perf.plugin/perf_plugin.c b/src/collectors/perf.plugin/perf_plugin.c index aea14bf2b7cd75..f69f9ab1791179 100644 --- a/src/collectors/perf.plugin/perf_plugin.c +++ b/src/collectors/perf.plugin/perf_plugin.c @@ -1325,7 +1325,7 @@ int main(int argc, char **argv) { for(iteration = 0; 1; iteration++) { usec_t dt = heartbeat_next(&hb); - if (unlikely(netdata_exit)) + if (unlikely(exit_initiated)) break; if (unlikely(debug && iteration)) diff --git a/src/collectors/systemd-journal.plugin/systemd-journal-annotations.c b/src/collectors/systemd-journal.plugin/systemd-journal-annotations.c index 4ce948f8496f92..fe8cf2457fd8c9 100644 --- a/src/collectors/systemd-journal.plugin/systemd-journal-annotations.c +++ b/src/collectors/systemd-journal.plugin/systemd-journal-annotations.c @@ -619,6 +619,7 @@ static void netdata_systemd_journal_message_ids_init(void) { msgid_into_dict("ec87a56120d5431bace51e2fb8bba243", "Netdata log flood protection"); msgid_into_dict("acb33cb95778476baac702eb7e4e151d", "Netdata Cloud connection"); msgid_into_dict("d1f59606dd4d41e3b217a0cfcae8e632", "Netdata extreme cardinality"); + msgid_into_dict("02f47d350af5449197bf7a95b605a468", "Netdata exit reason"); msgid_into_dict("4fdf40816c124623a032b7fe73beacb8", "Netdata dynamic configuration"); } diff --git a/src/collectors/systemd-journal.plugin/systemd-journal-files.c b/src/collectors/systemd-journal.plugin/systemd-journal-files.c index 6bd0fca59cf77b..b41675c5d1cedf 100644 --- a/src/collectors/systemd-journal.plugin/systemd-journal-files.c +++ b/src/collectors/systemd-journal.plugin/systemd-journal-files.c @@ -344,9 +344,9 @@ void journal_file_update_header(const char *filename, struct journal_file *jf) { jf->last_scan_header_vs_last_modified_ut = jf->file_last_modified_ut; - nd_log(NDLS_COLLECTORS, NDLP_DEBUG, - "Journal file header updated '%s'", - jf->filename); +// nd_log(NDLS_COLLECTORS, NDLP_DEBUG, +// "Journal file header updated '%s'", +// jf->filename); } static STRING *string_strdupz_source(const char *s, const char *e, size_t max_len, const char *prefix) { @@ -459,9 +459,9 @@ static bool files_registry_conflict_cb(const DICTIONARY_ITEM *item __maybe_unuse jf->msg_last_ut = jf->file_last_modified_ut; - nd_log(NDLS_COLLECTORS, NDLP_DEBUG, - "Journal file updated to the journal files registry '%s'", - jf->filename); +// nd_log(NDLS_COLLECTORS, NDLP_DEBUG, +// "Journal file updated to the journal files registry '%s'", +// jf->filename); } return false; diff --git a/src/collectors/systemd-journal.plugin/systemd-journal-watcher.c b/src/collectors/systemd-journal.plugin/systemd-journal-watcher.c index ea71b4fa3833c6..8813f559f732ad 100644 --- a/src/collectors/systemd-journal.plugin/systemd-journal-watcher.c +++ b/src/collectors/systemd-journal.plugin/systemd-journal-watcher.c @@ -296,7 +296,7 @@ void process_event(Watcher *watcher, int inotifyFd, struct inotify_event *event) return; } -#ifdef NETDATA_INTERNAL_CHECKS +#if 0 { CLEAN_BUFFER *wb = buffer_create(0, NULL); INOTIFY_MASK_2buffer(wb, event->mask, ", "); @@ -435,9 +435,9 @@ static void process_pending(Watcher *watcher) { dictionary_del(journal_files_registry, fullPath); } else if(S_ISREG(info.st_mode)) { - nd_log(NDLS_COLLECTORS, NDLP_DEBUG, - "JOURNAL WATCHER: file '%s' has been added/updated, updating the registry", - fullPath); +// nd_log(NDLS_COLLECTORS, NDLP_DEBUG, +// "JOURNAL WATCHER: file '%s' has been added/updated, updating the registry", +// fullPath); struct journal_file t = { .file_last_modified_ut = info.st_mtim.tv_sec * USEC_PER_SEC + diff --git a/src/collectors/tc.plugin/tc-qos-helper.sh.in b/src/collectors/tc.plugin/tc-qos-helper.sh.in index 572625b21144d4..0ba9f95571e121 100755 --- a/src/collectors/tc.plugin/tc-qos-helper.sh.in +++ b/src/collectors/tc.plugin/tc-qos-helper.sh.in @@ -73,14 +73,14 @@ log() { [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return - systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <action) +void analytics_statistic_send(const analytics_statistic_t *statistic) { + if (!statistic || !statistic->action || !analytics_check_enabled() || !analytics_script_exists()) return; const char *action_result = statistic->result; @@ -825,16 +882,17 @@ void analytics_statistic_send(const analytics_statistic_t *statistic) { if (!statistic->result) action_result = ""; + if (!statistic->data) action_data = ""; char *command_to_run = mallocz( - sizeof(char) * (strlen(statistic->action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + + sizeof(char) * (strlen(statistic->action) + strlen(action_result) + strlen(action_data) + FILENAME_MAX + analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15)); sprintf( command_to_run, - "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", - as_script, + "%s/anonymous-statistics.sh '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", + netdata_configured_primary_plugins_dir, statistic->action, action_result, action_data, @@ -880,8 +938,8 @@ void analytics_statistic_send(const analytics_statistic_t *statistic) { analytics_data.netdata_fail_reason); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "%s '%s' '%s' '%s'", - as_script, statistic->action, action_result, action_data); + "%s/anonymous-statistics.sh '%s' '%s' '%s'", + netdata_configured_primary_plugins_dir, statistic->action, action_result, action_data); POPEN_INSTANCE *instance = spawn_popen_run(command_to_run); if (instance) { @@ -902,8 +960,8 @@ void analytics_statistic_send(const analytics_statistic_t *statistic) { } else nd_log(NDLS_DAEMON, NDLP_NOTICE, - "Failed to run statistics script: %s.", - as_script); + "Failed to run statistics script: %s/anonymous-statistics.sh", + netdata_configured_primary_plugins_dir); freez(command_to_run); } diff --git a/src/daemon/analytics.h b/src/daemon/analytics.h index 43e1ce4632e542..63cbde5318c393 100644 --- a/src/daemon/analytics.h +++ b/src/daemon/analytics.h @@ -96,6 +96,7 @@ typedef struct { } analytics_statistic_t; void analytics_statistic_send(const analytics_statistic_t *statistic); +bool analytics_check_enabled(void); extern struct analytics_data analytics_data; diff --git a/src/daemon/buildinfo.c b/src/daemon/buildinfo.c index b3c26454997789..9f5280e67ba837 100644 --- a/src/daemon/buildinfo.c +++ b/src/daemon/buildinfo.c @@ -1512,14 +1512,16 @@ static void populate_packaging_info() { } OS_SYSTEM_MEMORY sm = os_system_memory(true); - char buf[1024]; - snprintfz(buf, sizeof(buf), "%" PRIu64, sm.ram_total_bytes); - // size_snprintf(buf, sizeof(buf), sm.ram_total_bytes, "B", false); - build_info_set_value_strdupz(BIB_RUNTIME_MEM_TOTAL, buf); - - snprintfz(buf, sizeof(buf), "%" PRIu64, sm.ram_available_bytes); - // size_snprintf(buf, sizeof(buf), sm.ram_available_bytes, "B", false); - build_info_set_value_strdupz(BIB_RUNTIME_MEM_AVAIL, buf); + if(OS_SYSTEM_MEMORY_OK(sm)) { + char buf[1024]; + snprintfz(buf, sizeof(buf), "%" PRIu64, sm.ram_total_bytes); + // size_snprintf(buf, sizeof(buf), sm.ram_total_bytes, "B", false); + build_info_set_value_strdupz(BIB_RUNTIME_MEM_TOTAL, buf); + + snprintfz(buf, sizeof(buf), "%" PRIu64, sm.ram_available_bytes); + // size_snprintf(buf, sizeof(buf), sm.ram_available_bytes, "B", false); + build_info_set_value_strdupz(BIB_RUNTIME_MEM_AVAIL, buf); + } } // ---------------------------------------------------------------------------- diff --git a/src/daemon/commands.c b/src/daemon/commands.c index ad2686429ef9d6..084536dee73b77 100644 --- a/src/daemon/commands.c +++ b/src/daemon/commands.c @@ -164,7 +164,7 @@ static cmd_status_t cmd_exit_execute(char *args, char **message) nd_log_limits_unlimited(); netdata_log_info("COMMAND: Cleaning up to exit."); - netdata_cleanup_and_exit(0, NULL, NULL, NULL); + netdata_cleanup_and_exit(EXIT_REASON_CMD_EXIT, NULL, NULL, NULL); exit(0); return CMD_STATUS_SUCCESS; diff --git a/src/daemon/common.h b/src/daemon/common.h index cc2982c04b959d..3138c2add3d9be 100644 --- a/src/daemon/common.h +++ b/src/daemon/common.h @@ -90,7 +90,7 @@ extern const char *netdata_configured_host_prefix; extern const char *netdata_configured_timezone; extern const char *netdata_configured_abbrev_timezone; extern int32_t netdata_configured_utc_offset; -extern int netdata_anonymous_statistics_enabled; +extern bool netdata_anonymous_statistics_enabled; extern bool netdata_ready; extern time_t netdata_start_time; diff --git a/src/daemon/config/netdata-conf-backwards-compatibility.c b/src/daemon/config/netdata-conf-backwards-compatibility.c index 75b5847fe77edd..e774dee24526c5 100644 --- a/src/daemon/config/netdata-conf-backwards-compatibility.c +++ b/src/daemon/config/netdata-conf-backwards-compatibility.c @@ -4,6 +4,8 @@ #include "database/engine/rrdengineapi.h" void netdata_conf_backwards_compatibility(void) { + FUNCTION_RUN_ONCE(); + // move [global] options to the [web] section inicfg_move(&netdata_config, CONFIG_SECTION_GLOBAL, "http port listen backlog", diff --git a/src/daemon/config/netdata-conf-db.c b/src/daemon/config/netdata-conf-db.c index 51e2102307f2d5..c06165d212148e 100644 --- a/src/daemon/config/netdata-conf-db.c +++ b/src/daemon/config/netdata-conf-db.c @@ -27,9 +27,7 @@ size_t get_tier_grouping(size_t tier) { } static void netdata_conf_dbengine_pre_logs(void) { - static bool run = false; - if(run) return; - run = true; + FUNCTION_RUN_ONCE(); errno_clear(); @@ -143,7 +141,7 @@ void netdata_conf_dbengine_init(const char *hostname) { dbengine_out_of_memory_protection = 0; // will be calculated below OS_SYSTEM_MEMORY sm = os_system_memory(true); - if(sm.ram_total_bytes && sm.ram_available_bytes && sm.ram_total_bytes > sm.ram_available_bytes) { + if(OS_SYSTEM_MEMORY_OK(sm) && sm.ram_total_bytes > sm.ram_available_bytes) { // calculate the default out of memory protection size uint64_t keep_free = sm.ram_total_bytes / 10; if(keep_free > 5ULL * 1024 * 1024 * 1024) @@ -340,9 +338,7 @@ void netdata_conf_dbengine_init(const char *hostname) { } void netdata_conf_section_db(void) { - static bool run = false; - if(run) return; - run = true; + FUNCTION_RUN_ONCE(); // ------------------------------------------------------------------------ // get default database update frequency diff --git a/src/daemon/config/netdata-conf-directories.c b/src/daemon/config/netdata-conf-directories.c index 0a50292edb5eff..7a9d4ebcb5b7b2 100644 --- a/src/daemon/config/netdata-conf-directories.c +++ b/src/daemon/config/netdata-conf-directories.c @@ -10,9 +10,7 @@ static const char *get_varlib_subdir_from_config(const char *prefix, const char } void netdata_conf_section_directories(void) { - static bool run = false; - if(run) return; - run = true; + FUNCTION_RUN_ONCE(); // ------------------------------------------------------------------------ // get system paths diff --git a/src/daemon/config/netdata-conf-global.c b/src/daemon/config/netdata-conf-global.c index d637724d754dbe..7fb1ba7f0c68c5 100644 --- a/src/daemon/config/netdata-conf-global.c +++ b/src/daemon/config/netdata-conf-global.c @@ -86,6 +86,10 @@ void libuv_initialize(void) { } void netdata_conf_section_global(void) { + FUNCTION_RUN_ONCE(); + + netdata_conf_section_directories(); + // ------------------------------------------------------------------------ // get the hostname @@ -99,8 +103,6 @@ void netdata_conf_section_global(void) { netdata_configured_hostname = inicfg_get(&netdata_config, CONFIG_SECTION_GLOBAL, "hostname", buf); netdata_log_debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname); - netdata_conf_section_directories(); - nd_profile_setup(); // required for configuring the database netdata_conf_section_db(); diff --git a/src/daemon/config/netdata-conf-logs.c b/src/daemon/config/netdata-conf-logs.c index b61623b5404794..3975e3d13d83a6 100644 --- a/src/daemon/config/netdata-conf-logs.c +++ b/src/daemon/config/netdata-conf-logs.c @@ -25,9 +25,9 @@ static void debug_flags_initialize(void) { } void netdata_conf_section_logs(void) { - static bool run = false; - if(run) return; - run = true; + FUNCTION_RUN_ONCE(); + + netdata_conf_section_directories(); nd_log_set_facility(inicfg_get(&netdata_config, CONFIG_SECTION_LOGS, "facility", "daemon")); diff --git a/src/daemon/config/netdata-conf-profile.c b/src/daemon/config/netdata-conf-profile.c index 3dbbd2b22fb872..d423ce0730af27 100644 --- a/src/daemon/config/netdata-conf-profile.c +++ b/src/daemon/config/netdata-conf-profile.c @@ -31,13 +31,14 @@ ND_PROFILE nd_profile_detect_and_configure(bool recheck) { // required for detecting the profile stream_conf_load(); + netdata_conf_section_directories(); ND_PROFILE def_profile = ND_PROFILE_NONE; OS_SYSTEM_MEMORY mem = os_system_memory(true); size_t cpus = os_get_system_cpus_uncached(); - if(cpus <= 1 || (mem.ram_total_bytes && mem.ram_total_bytes < 1ULL * 1024 * 1024 * 1024)) + if(cpus <= 1 || (OS_SYSTEM_MEMORY_OK(mem) && mem.ram_total_bytes < 1ULL * 1024 * 1024 * 1024)) def_profile = ND_PROFILE_IOT; else if(stream_conf_is_parent(true)) @@ -94,15 +95,13 @@ ND_PROFILE nd_profile_detect_and_configure(bool recheck) { struct nd_profile_t nd_profile = { 0 }; void nd_profile_setup(void) { - static bool run = false; - if(run) return; - run = true; + FUNCTION_RUN_ONCE(); ND_PROFILE profile = nd_profile_detect_and_configure(true); (void)profile; if(netdata_conf_is_iot()) { nd_profile.storage_tiers = 3; // MUST BE 1 nd_profile.update_every = 1; // MUST BE 2 - nd_profile.malloc_arenas = 1; + nd_profile.malloc_arenas = 4; nd_profile.malloc_trim = 32 * 1024; nd_profile.stream_sender_compression = ND_COMPRESSION_FASTEST; // web server threads = 6 diff --git a/src/daemon/config/netdata-conf-ssl.c b/src/daemon/config/netdata-conf-ssl.c index 04501cc75f10aa..0466f0aef0739c 100644 --- a/src/daemon/config/netdata-conf-ssl.c +++ b/src/daemon/config/netdata-conf-ssl.c @@ -53,7 +53,7 @@ const char *detect_libcurl_default_ca() { return NULL; } -static const char *detect_ca_path(void) { +static inline const char *detect_ca_path(void) { static const char *paths[] = { "/etc/ssl/certs/ca-certificates.crt", // Debian, Ubuntu, Arch "/etc/pki/tls/certs/ca-bundle.crt", // RHEL, CentOS, Fedora @@ -74,6 +74,8 @@ static const char *detect_ca_path(void) { } void netdata_conf_ssl(void) { + FUNCTION_RUN_ONCE(); + netdata_ssl_initialize_openssl(); #if 0 diff --git a/src/daemon/config/netdata-conf-web.c b/src/daemon/config/netdata-conf-web.c index 8b3cb64480c271..c02333f4a3c4be 100644 --- a/src/daemon/config/netdata-conf-web.c +++ b/src/daemon/config/netdata-conf-web.c @@ -42,9 +42,7 @@ static int make_dns_decision(const char *section_name, const char *config_name, extern struct netdata_static_thread *static_threads; void web_server_threading_selection(void) { - static bool run = false; - if(run) return; - run = true; + FUNCTION_RUN_ONCE(); web_server_mode = web_server_mode_id(inicfg_get(&netdata_config, CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode))); @@ -58,9 +56,7 @@ void web_server_threading_selection(void) { } void netdata_conf_section_web(void) { - static bool run = false; - if(run) return; - run = true; + FUNCTION_RUN_ONCE(); web_client_timeout = (int)inicfg_get_duration_seconds(&netdata_config, CONFIG_SECTION_WEB, "disconnect idle clients after", web_client_timeout); @@ -146,9 +142,7 @@ void netdata_conf_section_web(void) { } void netdata_conf_web_security_init(void) { - static bool run = false; - if(run) return; - run = true; + FUNCTION_RUN_ONCE(); char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir); diff --git a/src/daemon/config/netdata-conf.c b/src/daemon/config/netdata-conf.c index 4a85c3315e015e..f5c4e6184d0d62 100644 --- a/src/daemon/config/netdata-conf.c +++ b/src/daemon/config/netdata-conf.c @@ -4,9 +4,7 @@ #include "daemon/common.h" bool netdata_conf_load(char *filename, char overwrite_used, const char **user) { - static bool run = false; - if(run) return false; - run = true; + FUNCTION_RUN_ONCE_RET(false); errno_clear(); @@ -35,6 +33,7 @@ bool netdata_conf_load(char *filename, char overwrite_used, const char **user) { } netdata_conf_backwards_compatibility(); + netdata_conf_section_directories(); netdata_conf_section_global_run_as_user(user); libuv_initialize(); return ret; diff --git a/src/daemon/daemon-service.c b/src/daemon/daemon-service.c index 3898ec7f145e2c..9f1d30202bd2a7 100644 --- a/src/daemon/daemon-service.c +++ b/src/daemon/daemon-service.c @@ -90,7 +90,7 @@ bool service_running(SERVICE_TYPE service) { if (sth->type == SERVICE_THREAD_TYPE_NETDATA) cancelled = nd_thread_signaled_to_cancel(); - return !sth->stop_immediately && !netdata_exit && !cancelled; + return !sth->stop_immediately && !exit_initiated && !cancelled; } void service_signal_exit(SERVICE_TYPE service) { diff --git a/src/daemon/daemon-service.h b/src/daemon/daemon-service.h index 06ba521295e7cd..da3fbbdf3e7f50 100644 --- a/src/daemon/daemon-service.h +++ b/src/daemon/daemon-service.h @@ -19,7 +19,8 @@ typedef enum { SERVICE_CONTEXT = (1 << 10), SERVICE_ANALYTICS = (1 << 11), SERVICE_EXPORTERS = (1 << 12), - SERVICE_HTTPD = (1 << 13) + SERVICE_HTTPD = (1 << 13), + SERVICE_SYSTEMD = (1 << 14), } SERVICE_TYPE; typedef enum { diff --git a/src/daemon/daemon-shutdown.c b/src/daemon/daemon-shutdown.c index f858ba62e68448..9c261f66ace0ed 100644 --- a/src/daemon/daemon-shutdown.c +++ b/src/daemon/daemon-shutdown.c @@ -2,6 +2,7 @@ #include "daemon-shutdown.h" #include "daemon-service.h" +#include "daemon-status-file.h" #include "daemon/daemon-shutdown-watcher.h" #include "static_threads.h" #include "common.h" @@ -26,6 +27,21 @@ void web_client_cache_destroy(void); extern struct netdata_static_thread *static_threads; +void netdata_log_exit_reason(void) { + CLEAN_BUFFER *wb = buffer_create(0, NULL); + EXIT_REASON_2buffer(wb, exit_initiated, ", "); + + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_UUID(NDF_MESSAGE_ID, &netdata_exit_msgid), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + nd_log(NDLS_DAEMON, is_exit_reason_normal(exit_initiated) ? NDLP_NOTICE : NDLP_CRIT, + "NETDATA SHUTDOWN: initializing shutdown with code due to: %s", + buffer_tostring(wb)); +} + void cancel_main_threads(void) { nd_log_limits_unlimited(); @@ -146,8 +162,25 @@ static void rrdeng_flush_everything_and_wait(bool wait_flush, bool wait_collecto } #endif -void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) { - netdata_exit = 1; +void netdata_cleanup_and_exit(EXIT_REASON reason, const char *action, const char *action_result, const char *action_data) { + exit_initiated_set(reason); + int ret = is_exit_reason_normal(exit_initiated) ? 0 : 1; + + // don't recurse (due to a fatal, while exiting) + static bool run = false; + if(run) { + nd_log(NDLS_DAEMON, NDLP_ERR, "EXIT: Recursion detected. Exiting immediately."); + exit(ret); + } + run = true; + daemon_status_file_save(DAEMON_STATUS_EXITING); + + nd_log_limits_unlimited(); + netdata_log_exit_reason(); + + watcher_thread_start(); + usec_t shutdown_start_time = now_monotonic_usec(); + watcher_shutdown_begin(); #ifdef ENABLE_DBENGINE if(!ret && dbengine_enabled) @@ -155,12 +188,6 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re rrdeng_flush_everything_and_wait(false, false); #endif - usec_t shutdown_start_time = now_monotonic_usec(); - watcher_shutdown_begin(); - - nd_log_limits_unlimited(); - netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret); - // send the stat from our caller analytics_statistic_t statistic = { action, action_result, action_data }; analytics_statistic_send(&statistic); @@ -169,11 +196,6 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re statistic = (analytics_statistic_t) {"EXIT", ret?"ERROR":"OK","-"}; analytics_statistic_send(&statistic); - char agent_crash_file[FILENAME_MAX + 1]; - char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; - snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); - snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); - (void) rename(agent_crash_file, agent_incomplete_shutdown_file); watcher_step_complete(WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE); netdata_main_spawn_server_cleanup(); @@ -294,13 +316,14 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re netdata_ssl_cleanup(); watcher_step_complete(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); - (void) unlink(agent_incomplete_shutdown_file); watcher_step_complete(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE); watcher_shutdown_end(); watcher_thread_stop(); curl_global_cleanup(); + daemon_status_file_save(DAEMON_STATUS_EXITED); + #ifdef OS_WINDOWS return; #endif @@ -316,6 +339,9 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re exit(ret); } #else - exit(ret); + if(ret) + _exit(ret); + else + exit(ret); #endif } diff --git a/src/daemon/daemon-status-file.c b/src/daemon/daemon-status-file.c new file mode 100644 index 00000000000000..da4b7a8d41668e --- /dev/null +++ b/src/daemon/daemon-status-file.c @@ -0,0 +1,652 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include "daemon-status-file.h" + +#include +#include +#include +#include + +#define STATUS_FILENAME "status-netdata.json" +#define STATUS_FILENAME_TMP "status-netdata.json.tmp" + +ENUM_STR_MAP_DEFINE(DAEMON_STATUS) = { + { DAEMON_STATUS_NONE, "none"}, + { DAEMON_STATUS_INITIALIZING, "initializing"}, + { DAEMON_STATUS_RUNNING, "running"}, + { DAEMON_STATUS_EXITING, "exiting"}, + { DAEMON_STATUS_EXITED, "exited"}, + + // terminator + { 0, NULL }, +}; +ENUM_STR_DEFINE_FUNCTIONS(DAEMON_STATUS, DAEMON_STATUS_NONE, "none"); + +ENUM_STR_MAP_DEFINE(DAEMON_OS_TYPE) = { + {DAEMON_OS_TYPE_UNKNOWN, "unknown"}, + {DAEMON_OS_TYPE_LINUX, "linux"}, + {DAEMON_OS_TYPE_FREEBSD, "freebsd"}, + {DAEMON_OS_TYPE_MACOS, "macos"}, + {DAEMON_OS_TYPE_WINDOWS, "windows"}, + + // terminator + { 0, NULL }, +}; +ENUM_STR_DEFINE_FUNCTIONS(DAEMON_OS_TYPE, DAEMON_OS_TYPE_UNKNOWN, "unknown"); + +static DAEMON_STATUS_FILE last_session_status = { 0 }; +static DAEMON_STATUS_FILE session_status = { 0 }; + +// -------------------------------------------------------------------------------------------------------------------- +// json generation + +static void daemon_status_file_to_json(BUFFER *wb, DAEMON_STATUS_FILE *ds) { + buffer_json_member_add_datetime_rfc3339(wb, "@timestamp", ds->timestamp_ut, true); // ECS + buffer_json_member_add_uint64(wb, "version", 1); // custom + + buffer_json_member_add_object(wb, "agent"); // ECS + { + buffer_json_member_add_uuid(wb, "id", ds->host_id.uuid); // ECS + buffer_json_member_add_uuid_compact(wb, "ephemeral_id", ds->invocation.uuid); // ECS + buffer_json_member_add_string(wb, "version", ds->version); // ECS + + buffer_json_member_add_time_t(wb, "uptime", ds->uptime); // custom + + buffer_json_member_add_uuid(wb, "ND_node_id", ds->node_id.uuid); // custom + buffer_json_member_add_uuid(wb, "ND_claim_id", ds->claim_id.uuid); // custom + + ND_PROFILE_2json(wb, "ND_profile", ds->profile); // custom + buffer_json_member_add_string(wb, "ND_status", DAEMON_STATUS_2str(ds->status)); // custom + EXIT_REASON_2json(wb, "ND_exit_reason", ds->exit_reason); // custom + + buffer_json_member_add_object(wb, "ND_timings"); // custom + { + buffer_json_member_add_time_t(wb, "init", ds->timings.init); + buffer_json_member_add_time_t(wb, "exit", ds->timings.exit); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "host"); // ECS + { + buffer_json_member_add_object(wb, "boot"); // ECS + { + buffer_json_member_add_uuid(wb, "id", ds->boot_id.uuid); // ECS + } + buffer_json_object_close(wb); + buffer_json_member_add_time_t(wb, "uptime", ds->boottime); // ECS + + buffer_json_member_add_object(wb, "memory"); // custom + if(OS_SYSTEM_MEMORY_OK(ds->memory)) { + buffer_json_member_add_uint64(wb, "total", ds->memory.ram_total_bytes); + buffer_json_member_add_uint64(wb, "free", ds->memory.ram_available_bytes); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "disk"); // ECS + { + buffer_json_member_add_object(wb, "db"); + if (OS_SYSTEM_DISK_SPACE_OK(ds->var_cache)) { + buffer_json_member_add_uint64(wb, "total", ds->var_cache.total_bytes); + buffer_json_member_add_uint64(wb, "free", ds->var_cache.free_bytes); + buffer_json_member_add_uint64(wb, "inodes_total", ds->var_cache.total_inodes); + buffer_json_member_add_uint64(wb, "inodes_free", ds->var_cache.free_inodes); + buffer_json_member_add_boolean(wb, "read_only", ds->var_cache.is_read_only); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "os"); // ECS + { + buffer_json_member_add_string(wb, "type", DAEMON_OS_TYPE_2str(ds->os_type)); // ECS + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "fatal"); + { + buffer_json_member_add_uint64(wb, "line", ds->fatal.line); + buffer_json_member_add_string_or_empty(wb, "filename", ds->fatal.filename); + buffer_json_member_add_string_or_empty(wb, "function", ds->fatal.function); + buffer_json_member_add_string_or_empty(wb, "message", ds->fatal.message); + buffer_json_member_add_string_or_empty(wb, "stack_trace", ds->fatal.stack_trace); + } + buffer_json_object_close(wb); +} + +// -------------------------------------------------------------------------------------------------------------------- +// json parsing + +static bool daemon_status_file_from_json(json_object *jobj, void *data, BUFFER *error) { + char path[1024]; path[0] = '\0'; + + DAEMON_STATUS_FILE *ds = data; + char datetime[RFC3339_MAX_LENGTH]; datetime[0] = '\0'; + + // change management, version to know which fields to expect + uint64_t version = 0; + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "version", version, error, true); + + bool required = false; // allow missing fields and values + + // Parse timestamp + JSONC_PARSE_TXT2CHAR_OR_ERROR_AND_RETURN(jobj, path, "@timestamp", datetime, error, required); + if(datetime[0]) + ds->timestamp_ut = rfc3339_parse_ut(datetime, NULL); + + // Parse agent object + JSONC_PARSE_SUBOBJECT(jobj, path, "agent", error, required, { + JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "id", ds->host_id.uuid, error, required); + JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "ephemeral_id", ds->invocation.uuid, error, required); + JSONC_PARSE_TXT2CHAR_OR_ERROR_AND_RETURN(jobj, path, "version", ds->version, error, required); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "uptime", ds->uptime, error, required); + JSONC_PARSE_ARRAY_OF_TXT2BITMAP_OR_ERROR_AND_RETURN(jobj, path, "ND_profile", ND_PROFILE_2id_one, ds->profile, error, required); + JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, path, "ND_status", DAEMON_STATUS_2id, ds->status, error, required); + JSONC_PARSE_ARRAY_OF_TXT2BITMAP_OR_ERROR_AND_RETURN(jobj, path, "ND_exit_reason", EXIT_REASON_2id_one, ds->exit_reason, error, required); + JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "ND_node_id", ds->node_id.uuid, error, required); + JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "ND_claim_id", ds->claim_id.uuid, error, required); + + JSONC_PARSE_SUBOBJECT(jobj, path, "ND_timings", error, required, { + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "init", ds->timings.init, error, required); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "exit", ds->timings.exit, error, required); + }); + }); + + // Parse host object + JSONC_PARSE_SUBOBJECT(jobj, path, "host", error, required, { + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "uptime", ds->boottime, error, required); + + JSONC_PARSE_SUBOBJECT(jobj, path, "boot", error, required, { + JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "id", ds->boot_id.uuid, error, required); + }); + + JSONC_PARSE_SUBOBJECT(jobj, path, "memory", error, required, { + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "total", ds->memory.ram_total_bytes, error, required); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "free", ds->memory.ram_available_bytes, error, required); + if(!OS_SYSTEM_MEMORY_OK(ds->memory)) + ds->memory = OS_SYSTEM_MEMORY_EMPTY; + }); + + JSONC_PARSE_SUBOBJECT(jobj, path, "disk", error, required, { + JSONC_PARSE_SUBOBJECT(jobj, path, "db", error, required, { + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "total", ds->var_cache.total_bytes, error, false); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "free", ds->var_cache.free_bytes, error, false); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "inodes_total", ds->var_cache.total_inodes, error, false); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "inodes_free", ds->var_cache.free_inodes, error, false); + JSONC_PARSE_BOOL_OR_ERROR_AND_RETURN(jobj, path, "read_only", ds->var_cache.is_read_only, error, false); + if(!OS_SYSTEM_DISK_SPACE_OK(ds->var_cache)) + ds->var_cache = OS_SYSTEM_DISK_SPACE_EMPTY; + }); + }); + }); + + // Parse os object + JSONC_PARSE_SUBOBJECT(jobj, path, "os", error, required, { + JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, path, "type", DAEMON_OS_TYPE_2id, ds->os_type, error, required); + }); + + // Parse fatal object + JSONC_PARSE_SUBOBJECT(jobj, path, "fatal", error, required, { + JSONC_PARSE_TXT2STRDUPZ_OR_ERROR_AND_RETURN(jobj, path, "filename", ds->fatal.filename, error, required); + JSONC_PARSE_TXT2STRDUPZ_OR_ERROR_AND_RETURN(jobj, path, "function", ds->fatal.function, error, required); + JSONC_PARSE_TXT2STRDUPZ_OR_ERROR_AND_RETURN(jobj, path, "message", ds->fatal.message, error, required); + JSONC_PARSE_TXT2STRDUPZ_OR_ERROR_AND_RETURN(jobj, path, "stack_trace", ds->fatal.stack_trace, error, required); + JSONC_PARSE_UINT64_OR_ERROR_AND_RETURN(jobj, path, "line", ds->fatal.line, error, required); + }); + + return true; +} + +// -------------------------------------------------------------------------------------------------------------------- +// get the current status + +static DAEMON_STATUS_FILE daemon_status_file_get(DAEMON_STATUS status) { + usec_t now_ut = now_realtime_usec(); + +#if defined(OS_LINUX) + session_status.os_type = DAEMON_OS_TYPE_LINUX; +#elif defined(OS_FREEBSD) + session_status.os_type = DAEMON_OS_TYPE_FREEBSD; +#elif defined(OS_MACOS) + session_status.os_type = DAEMON_OS_TYPE_MACOS; +#elif defined(OS_WINDOWS) + session_status.os_type = DAEMON_OS_TYPE_WINDOWS; +#endif + + if(session_status.status == DAEMON_STATUS_INITIALIZING && status == DAEMON_STATUS_RUNNING) + session_status.timings.init = (time_t)((now_ut - session_status.timestamp_ut + USEC_PER_SEC/2) / USEC_PER_SEC); + + if(session_status.status == DAEMON_STATUS_EXITING && status == DAEMON_STATUS_EXITED) + session_status.timings.exit = (time_t)((now_ut - session_status.timestamp_ut + USEC_PER_SEC/2) / USEC_PER_SEC); + + strncpyz(session_status.version, NETDATA_VERSION, sizeof(session_status.version) - 1); + + session_status.boot_id = os_boot_id(); + if(!UUIDeq(session_status.boot_id, last_session_status.boot_id) && os_boot_ids_match(session_status.boot_id, last_session_status.boot_id)) { + // there is a slight difference in boot_id, but it is still the same boot + // copy the last boot_id + session_status.boot_id = last_session_status.boot_id; + } + + session_status.boottime = now_boottime_sec(); + session_status.uptime = now_realtime_sec() - netdata_start_time; + session_status.timestamp_ut = now_ut; + session_status.invocation = nd_log_get_invocation_id(); + + session_status.claim_id = claim_id_get_uuid(); + + if(localhost) { + session_status.host_id = localhost->host_id; + session_status.node_id = localhost->node_id; + } + else if(!UUIDiszero(last_session_status.host_id)) + session_status.host_id = last_session_status.host_id; + else { + const char *machine_guid = registry_get_this_machine_guid(); + if(machine_guid && *machine_guid) { + if (uuid_parse_flexi(machine_guid, session_status.host_id.uuid) != 0) + session_status.host_id = UUID_ZERO; + } + else + session_status.host_id = UUID_ZERO; + } + + if(UUIDiszero(session_status.claim_id)) + session_status.claim_id = last_session_status.claim_id; + if(UUIDiszero(session_status.node_id)) + session_status.node_id = last_session_status.node_id; + if(UUIDiszero(session_status.host_id)) + session_status.host_id = last_session_status.host_id; + + session_status.exit_reason = exit_initiated; + session_status.profile = nd_profile_detect_and_configure(false); + + if(status != DAEMON_STATUS_NONE) + session_status.status = status; + + session_status.memory = os_system_memory(true); + session_status.var_cache = os_disk_space(netdata_configured_cache_dir); + + return session_status; +} + +// -------------------------------------------------------------------------------------------------------------------- +// file helpers + +// List of fallback directories to try +static const char *status_file_fallbacks[] = { + "/tmp", + "/run", + "/var/run", +}; + +static bool check_status_file(const char *directory, char *filename, size_t filename_size, time_t *mtime) { + if(!directory || !*directory) + return false; + + snprintfz(filename, filename_size, "%s/%s", directory, STATUS_FILENAME); + + // Get file metadata + OS_FILE_METADATA metadata = os_get_file_metadata(filename); + if (!OS_FILE_METADATA_OK(metadata)) + return false; + + *mtime = metadata.modified_time; + return true; +} + +// -------------------------------------------------------------------------------------------------------------------- +// load a saved status + +static bool load_status_file(const char *filename, DAEMON_STATUS_FILE *status) { + FILE *fp = fopen(filename, "r"); + if (!fp) + return false; + + CLEAN_BUFFER *wb = buffer_create(0, NULL); + CLEAN_BUFFER *error = buffer_create(0, NULL); + + // Get file size + fseek(fp, 0, SEEK_END); + long file_size = ftell(fp); + fseek(fp, 0, SEEK_SET); + + // Read the file + buffer_need_bytes(wb, file_size + 1); + size_t read_bytes = fread(wb->buffer, 1, file_size, fp); + fclose(fp); + + if (read_bytes == 0) + return false; + + wb->buffer[read_bytes] = '\0'; + wb->len = read_bytes; + + // Parse the JSON + return json_parse_payload_or_error(wb, error, daemon_status_file_from_json, status) == HTTP_RESP_OK; +} + +DAEMON_STATUS_FILE daemon_status_file_load(void) { + DAEMON_STATUS_FILE status = {0}; + char newest_filename[FILENAME_MAX] = ""; + char current_filename[FILENAME_MAX]; + time_t newest_mtime = 0, current_mtime; + + // Check primary directory first + if(check_status_file(netdata_configured_cache_dir, current_filename, sizeof(current_filename), ¤t_mtime)) { + strncpyz(newest_filename, current_filename, sizeof(newest_filename) - 1); + newest_mtime = current_mtime; + } + + // Check each fallback location + for(size_t i = 0; i < _countof(status_file_fallbacks); i++) { + if(check_status_file(status_file_fallbacks[i], current_filename, sizeof(current_filename), ¤t_mtime) && + (!*newest_filename || current_mtime > newest_mtime)) { + strncpyz(newest_filename, current_filename, sizeof(newest_filename) - 1); + newest_mtime = current_mtime; + } + } + + // Load the newest file found + if(*newest_filename) { + if(!load_status_file(newest_filename, &status)) + nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to load newest status file: %s", newest_filename); + } + else + nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot find a status file in any location"); + + return status; +} + +// -------------------------------------------------------------------------------------------------------------------- +// save the current status + +static bool save_status_file(const char *directory, const char *content, size_t content_size) { + if(!directory || !*directory) + return false; + + char filename[FILENAME_MAX]; + char temp_filename[FILENAME_MAX]; + + snprintfz(filename, sizeof(filename), "%s/%s", directory, STATUS_FILENAME); + snprintfz(temp_filename, sizeof(temp_filename), "%s/%s", directory, STATUS_FILENAME_TMP); + + FILE *fp = fopen(temp_filename, "w"); + if (!fp) + return false; + + bool ok = fwrite(content, 1, content_size, fp) == content_size; + fclose(fp); + + if (!ok) { + unlink(filename); + unlink(temp_filename); + return false; + } + + if (chmod(temp_filename, 0664) != 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot set permissions on status file '%s'", temp_filename); + unlink(temp_filename); + return false; + } + + if (rename(temp_filename, filename) != 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Cannot rename status file '%s' to '%s'", temp_filename, filename); + unlink(temp_filename); + return false; + } + + return true; +} + +void daemon_status_file_save(DAEMON_STATUS status) { + static SPINLOCK spinlock = SPINLOCK_INITIALIZER; + spinlock_lock(&spinlock); + + // Get current status + DAEMON_STATUS_FILE ds = daemon_status_file_get(status); + + // Prepare JSON content + CLEAN_BUFFER *wb = buffer_create(0, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + daemon_status_file_to_json(wb, &ds); + buffer_json_finalize(wb); + + const char *content = buffer_tostring(wb); + size_t content_size = buffer_strlen(wb); + + // Try primary directory first + bool saved = false; + if (save_status_file(netdata_configured_cache_dir, content, content_size)) + saved = true; + else { + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Failed to save status file in primary directory %s", + netdata_configured_cache_dir); + + // Try each fallback directory until successful + for(size_t i = 0; i < _countof(status_file_fallbacks); i++) { + if(save_status_file(status_file_fallbacks[i], content, content_size)) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Saved status file in fallback %s", status_file_fallbacks[i]); + saved = true; + break; + } + } + } + + if (!saved) + nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to save status file in any location"); + + spinlock_unlock(&spinlock); +} + +// -------------------------------------------------------------------------------------------------------------------- +// POST the last status to agent-events + +struct post_status_file_thread_data { + const char *cause; + const char *msg; + ND_LOG_FIELD_PRIORITY priority; + DAEMON_STATUS_FILE status; +}; + +void post_status_file(struct post_status_file_thread_data *d) { + CLEAN_BUFFER *wb = buffer_create(0, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + buffer_json_member_add_string(wb, "exit_cause", d->cause); // custom + buffer_json_member_add_string(wb, "message", d->msg); // ECS + buffer_json_member_add_uint64(wb, "priority", d->priority); // custom + daemon_status_file_to_json(wb, &d->status); + buffer_json_finalize(wb); + + const char *json_data = buffer_tostring(wb); + + CURL *curl = curl_easy_init(); + if(!curl) + return; + + curl_easy_setopt(curl, CURLOPT_URL, "https://agent-events.netdata.cloud/agent-events"); + curl_easy_setopt(curl, CURLOPT_POST, 1L); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, json_data); + struct curl_slist *headers = NULL; + headers = curl_slist_append(headers, "Content-Type: application/json"); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); + + CURLcode rc = curl_easy_perform(curl); + (void)rc; + + curl_easy_cleanup(curl); + curl_slist_free_all(headers); +} + +void *post_status_file_thread(void *ptr) { + struct post_status_file_thread_data *d = (struct post_status_file_thread_data *)ptr; + post_status_file(d); + freez((void *)d->cause); + freez((void *)d->msg); + freez(d); + return NULL; +} + +// -------------------------------------------------------------------------------------------------------------------- +// check last status on startup and post crash report + +void daemon_status_file_check_crash(void) { + last_session_status = daemon_status_file_load(); + daemon_status_file_save(DAEMON_STATUS_INITIALIZING); + ND_LOG_FIELD_PRIORITY pri = NDLP_NOTICE; + + bool new_version = strcmp(last_session_status.version, session_status.version) != 0; + bool post_crash_report = false; + bool disable_crash_report = false; + bool dump_json = true; + const char *msg, *cause; + switch(last_session_status.status) { + default: + case DAEMON_STATUS_NONE: + // probably a previous version of netdata was running + cause = "no last status"; + msg = "No status found for the previous Netdata session"; + disable_crash_report = true; + break; + + case DAEMON_STATUS_EXITED: + if(last_session_status.exit_reason == EXIT_REASON_NONE) { + cause = "exit no reason"; + msg = "Netdata was last stopped gracefully (no exit reason set)"; + if(!last_session_status.timestamp_ut) + dump_json = false; + } + else if(!is_exit_reason_normal(last_session_status.exit_reason)) { + cause = "exit on fatal"; + msg = "Netdata was last stopped gracefully (encountered an error)"; + pri = NDLP_ERR; + post_crash_report = true; + } + else if(last_session_status.exit_reason & EXIT_REASON_SYSTEM_SHUTDOWN) { + cause = "exit on system shutdown"; + msg = "Netdata has gracefully stopped due to system shutdown"; + } + else if(last_session_status.exit_reason & EXIT_REASON_UPDATE) { + cause = "exit to update"; + msg = "Netdata has gracefully restarted to update to a new version"; + } + else if(new_version) { + cause = "exit and updated"; + msg = "Netdata has gracefully restarted and updated to a new version"; + last_session_status.exit_reason |= EXIT_REASON_UPDATE; + } + else { + cause = "exit instructed"; + msg = "Netdata was last stopped gracefully (instructed to do so)"; + } + break; + + case DAEMON_STATUS_INITIALIZING: + cause = "crashed on start"; + msg = "Netdata was last killed/crashed while starting"; + pri = NDLP_ERR; + post_crash_report = true; + break; + + case DAEMON_STATUS_EXITING: + if(!is_exit_reason_normal(last_session_status.exit_reason)) { + cause = "crashed on fatal"; + msg = "Netdata was last killed/crashed while exiting after encountering an error"; + } + else if(last_session_status.exit_reason & EXIT_REASON_SYSTEM_SHUTDOWN) { + cause = "crashed on system shutdown"; + msg = "Netdata was last killed/crashed while exiting due to system shutdown"; + } + else if(new_version || (last_session_status.exit_reason & EXIT_REASON_UPDATE)) { + cause = "crashed on update"; + msg = "Netdata was last killed/crashed while exiting to update to a new version"; + } + else { + cause = "crashed on exit"; + msg = "Netdata was last killed/crashed while exiting (instructed to do so)"; + } + pri = NDLP_ERR; + post_crash_report = true; + break; + + case DAEMON_STATUS_RUNNING: { + if (!UUIDeq(session_status.boot_id, last_session_status.boot_id)) { + cause = "abnormal power off"; + msg = "The system was abnormally powered off while Netdata was running"; + pri = NDLP_CRIT; + } + else { + cause = "killed hard"; + msg = "Netdata was last killed/crashed while operating normally"; + pri = NDLP_CRIT; + post_crash_report = true; + } + break; + } + } + + CLEAN_BUFFER *wb = buffer_create(0, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + if(dump_json) + daemon_status_file_to_json(wb, &last_session_status); + buffer_json_finalize(wb); + + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_UUID(NDF_MESSAGE_ID, &netdata_startup_msgid), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + nd_log(NDLS_DAEMON, pri, + "Netdata Agent version '%s' is starting...\n" + "Last exit status: %s (%s):\n\n%s", + NETDATA_VERSION, msg, cause, buffer_tostring(wb)); + + if(!disable_crash_report && (analytics_check_enabled() || post_crash_report)) { + netdata_conf_ssl(); + + struct post_status_file_thread_data *d = calloc(1, sizeof(*d)); + d->cause = strdupz(cause); + d->msg = strdupz(msg); + d->status = last_session_status; + d->priority = pri; + nd_thread_create("post_status_file", NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_DEFAULT, post_status_file_thread, d); + } +} + +bool daemon_status_file_has_last_crashed(void) { + return last_session_status.status != DAEMON_STATUS_EXITED || !is_exit_reason_normal(last_session_status.exit_reason); +} + +bool daemon_status_file_was_incomplete_shutdown(void) { + return last_session_status.status == DAEMON_STATUS_EXITING; +} + +// -------------------------------------------------------------------------------------------------------------------- +// ng_log() hook for receiving fatal message information + +void daemon_status_file_register_fatal(const char *filename, const char *function, const char *message, const char *stack_trace, long line) { + static SPINLOCK spinlock = SPINLOCK_INITIALIZER; + spinlock_lock(&spinlock); + + if(session_status.fatal.filename || session_status.fatal.function || session_status.fatal.message || session_status.fatal.stack_trace) { + spinlock_unlock(&spinlock); + freez((void *)filename); + freez((void *)function); + freez((void *)message); + freez((void *)stack_trace); + return; + } + + session_status.fatal.filename = filename; + session_status.fatal.function = function; + session_status.fatal.message = message; + session_status.fatal.stack_trace = stack_trace; + session_status.fatal.line = line; + + spinlock_unlock(&spinlock); +} diff --git a/src/daemon/daemon-status-file.h b/src/daemon/daemon-status-file.h new file mode 100644 index 00000000000000..5c5b0196f76a39 --- /dev/null +++ b/src/daemon/daemon-status-file.h @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DAEMON_STATUS_FILE_H +#define NETDATA_DAEMON_STATUS_FILE_H + +#include "libnetdata/libnetdata.h" +#include "daemon/config/netdata-conf-profile.h" + +typedef enum { + DAEMON_STATUS_NONE, + DAEMON_STATUS_INITIALIZING, + DAEMON_STATUS_RUNNING, + DAEMON_STATUS_EXITING, + DAEMON_STATUS_EXITED, +} DAEMON_STATUS; +ENUM_STR_DEFINE_FUNCTIONS_EXTERN(DAEMON_STATUS); + +typedef enum { + DAEMON_OS_TYPE_UNKNOWN, + DAEMON_OS_TYPE_LINUX, + DAEMON_OS_TYPE_FREEBSD, + DAEMON_OS_TYPE_MACOS, + DAEMON_OS_TYPE_WINDOWS, +} DAEMON_OS_TYPE; +ENUM_STR_DEFINE_FUNCTIONS_EXTERN(DAEMON_OS_TYPE); + +typedef struct daemon_status_file { + char version[32]; // the netdata version + DAEMON_STATUS status; // the daemon status + EXIT_REASON exit_reason; // the exit reason (maybe empty) + ND_PROFILE profile; // the profile of the agent + DAEMON_OS_TYPE os_type; + + time_t boottime; // system boottime + time_t uptime; // netdata uptime + usec_t timestamp_ut; // the timestamp of the status file + + ND_UUID boot_id; // the boot id of the system + ND_UUID invocation; // the netdata invocation id generated the file + ND_UUID host_id; // the machine guid of the agent + ND_UUID node_id; // the Netdata Cloud node id of the agent + ND_UUID claim_id; // the Netdata Cloud claim id of the agent + + struct { + time_t init; + time_t exit; + } timings; + + OS_SYSTEM_MEMORY memory; + OS_SYSTEM_DISK_SPACE var_cache; + + struct { + long line; + const char *filename; + const char *function; + const char *stack_trace; + const char *message; + } fatal; +} DAEMON_STATUS_FILE; + +// loads the last status saved +DAEMON_STATUS_FILE daemon_status_file_load(void); + +// saves the current status +void daemon_status_file_save(DAEMON_STATUS status); + +// check for a crash +void daemon_status_file_check_crash(void); + +bool daemon_status_file_has_last_crashed(void); +bool daemon_status_file_was_incomplete_shutdown(void); + +void daemon_status_file_register_fatal(const char *filename, const char *function, const char *message, const char *stack_trace, long line); + +#endif //NETDATA_DAEMON_STATUS_FILE_H diff --git a/src/daemon/daemon-systemd-watcher.c b/src/daemon/daemon-systemd-watcher.c new file mode 100644 index 00000000000000..d71e81ba7d85cc --- /dev/null +++ b/src/daemon/daemon-systemd-watcher.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" +#include "daemon-systemd-watcher.h" +#include "daemon-service.h" + +#ifdef ENABLE_SYSTEMD_DBUS + +#include + +/* Callback function to handle the PrepareForShutdown signal. + * The signal sends a boolean: true indicates that shutdown is starting, + * false indicates that a previously initiated shutdown was canceled. + */ +static int shutdown_event_handler(sd_bus_message *m, void *userdata __maybe_unused, sd_bus_error *ret_error __maybe_unused) { + int shutdown; + int r = sd_bus_message_read(m, "b", &shutdown); + if (r < 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "SYSTEMD DBUS: Failed to parse shutdown message: %s", + strerror(-r)); + return r; + } + + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "SYSTEMD DBUS: Received PrepareForShutdown signal: shutdown=%s", + shutdown ? "true" : "false"); + + if(shutdown) + netdata_cleanup_and_exit(EXIT_REASON_SYSTEM_SHUTDOWN, NULL, NULL, NULL); + + return 0; +} + +/* Callback function to handle the PrepareForSleep signal. + * The signal sends a boolean: true indicates that the system is preparing to suspend, + * false indicates that a previous suspend was canceled (i.e. resuming). + */ +static int suspend_event_handler(sd_bus_message *m, void *userdata __maybe_unused, sd_bus_error *ret_error __maybe_unused) { + int suspend; + int r = sd_bus_message_read(m, "b", &suspend); + if (r < 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "SYSTEMD DBUS: Failed to parse suspend message: %s", + strerror(-r)); + return r; + } + + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "SYSTEMD DBUS: Received PrepareForSleep signal: suspend=%s\n", + suspend ? "true (suspending)" : "false (resuming)"); + + // Here you can trigger your suspend/resume logic. + return 0; +} + +/* Function that sets up the sd-bus listener for shutdown and suspend events. + * This function blocks in a loop processing bus events. + */ +static void listen_for_systemd_dbus_events(void) { + sd_bus *bus = NULL; + sd_bus_slot *shutdown_slot = NULL; + sd_bus_slot *suspend_slot = NULL; + int r; + + // Connect to the system bus. + r = sd_bus_open_system(&bus); + if (r < 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "SYSTEMD DBUS: Failed to connect to system bus: %s", + strerror(-r)); + goto finish; + } + + // Add a match rule for the PrepareForShutdown signal on the login1 manager. + r = sd_bus_add_match( + bus, + &shutdown_slot, + "type='signal'," + "sender='org.freedesktop.login1'," + "interface='org.freedesktop.login1.Manager'," + "member='PrepareForShutdown'", + shutdown_event_handler, + NULL); + if (r < 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "SYSTEMD DBUS: Failed to add signal match for shutdown: %s", + strerror(-r)); + goto finish; + } + + // Add a match rule for the PrepareForSleep signal on the login1 manager. + r = sd_bus_add_match( + bus, + &suspend_slot, + "type='signal'," + "sender='org.freedesktop.login1'," + "interface='org.freedesktop.login1.Manager'," + "member='PrepareForSleep'", + suspend_event_handler, + NULL); + if (r < 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "SYSTEMD DBUS: Failed to add signal match for suspend: %s", + strerror(-r)); + goto finish; + } + + // Process incoming D-Bus messages. + while (service_running(SERVICE_SYSTEMD)) { + // Process any pending messages. + r = sd_bus_process(bus, NULL); + if (r < 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "SYSTEMD DBUS: Failed to process bus: %s", + strerror(-r)); + goto finish; + } + if (r > 0) // Message was processed; check for more. + continue; + + // Wait for the next signal. + r = 0; + while(r == 0 && service_running(SERVICE_SYSTEMD)) + r = sd_bus_wait(bus, USEC_PER_SEC); + + if (r < 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, "SYSTEMD DBUS: Failed to wait on bus: %s", strerror(-r)); + break; + } + } + +finish: + sd_bus_slot_unref(shutdown_slot); + sd_bus_slot_unref(suspend_slot); + sd_bus_unref(bus); +} + +#endif + +void *systemd_watcher_thread(void *arg) { + struct netdata_static_thread *static_thread = arg; + + service_register(SERVICE_THREAD_TYPE_NETDATA, NULL, NULL, NULL, false); + +#ifdef ENABLE_SYSTEMD_DBUS + listen_for_systemd_dbus_events(); +#endif + + service_exits(); + worker_unregister(); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; + return NULL; +} diff --git a/src/daemon/daemon-systemd-watcher.h b/src/daemon/daemon-systemd-watcher.h new file mode 100644 index 00000000000000..c93505760e7d8c --- /dev/null +++ b/src/daemon/daemon-systemd-watcher.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DAEMON_SYSTEMD_WATCHER_H +#define NETDATA_DAEMON_SYSTEMD_WATCHER_H + +void *systemd_watcher_thread(void *arg); + +#endif //NETDATA_DAEMON_SYSTEMD_WATCHER_H diff --git a/src/daemon/daemon.c b/src/daemon/daemon.c index 944032f55911a3..60dc30dbbe4a51 100644 --- a/src/daemon/daemon.c +++ b/src/daemon/daemon.c @@ -58,7 +58,7 @@ static void change_dir_ownership(const char *dir, uid_t uid, gid_t gid, bool rec fix_directory_file_permissions(dir, uid, gid, recursive); } -static void clean_directory(const char *dirname) +static inline void clean_directory(const char *dirname) { DIR *dir = opendir(dirname); if(!dir) return; @@ -75,6 +75,7 @@ static void clean_directory(const char *dirname) } static void prepare_required_directories(uid_t uid, gid_t gid) { + change_dir_ownership(os_run_dir(true), uid, gid, false); change_dir_ownership(netdata_configured_cache_dir, uid, gid, true); change_dir_ownership(netdata_configured_varlib_dir, uid, gid, false); change_dir_ownership(netdata_configured_log_dir, uid, gid, false); diff --git a/src/daemon/daemon.h b/src/daemon/daemon.h index 13ef1f64713e0a..e3505ef21f2f82 100644 --- a/src/daemon/daemon.h +++ b/src/daemon/daemon.h @@ -5,8 +5,6 @@ int become_daemon(int dont_fork, const char *user); -void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data); - void get_netdata_execution_path(void); extern char *pidfile; diff --git a/src/daemon/main.c b/src/daemon/main.c index bd2698e2161c2c..4ef63f53eb5f8f 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -2,7 +2,8 @@ #include "common.h" #include "buildinfo.h" -#include "daemon/daemon-shutdown-watcher.h" +#include "daemon-shutdown-watcher.h" +#include "daemon-status-file.h" #include "static_threads.h" #include "web/api/queries/backfill.h" @@ -22,7 +23,7 @@ #endif bool unittest_running = false; -int netdata_anonymous_statistics_enabled; +bool netdata_anonymous_statistics_enabled = true; int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; bool ieee754_doubles = false; @@ -228,6 +229,7 @@ int unittest_prepare_rrd(const char **user) { } int netdata_main(int argc, char **argv) { + libjudy_malloc_init(); string_init(); analytics_init(); @@ -749,21 +751,40 @@ int netdata_main(int argc, char **argv) { // initialize the log files nd_log_initialize(); - { - ND_LOG_STACK lgs[] = { - ND_LOG_FIELD_UUID(NDF_MESSAGE_ID, &netdata_startup_msgid), - ND_LOG_FIELD_END(), - }; - ND_LOG_STACK_PUSH(lgs); + nd_log_register_event_cb(daemon_status_file_register_fatal); + + netdata_conf_section_global(); // get hostname, host prefix, profile, etc + registry_init(); // for machine_guid, must be after netdata_conf_section_global() - netdata_log_info("Netdata agent version '%s' is starting", NETDATA_VERSION); + // initialize thread - this is required before the first nd_thread_create() + default_stacksize = netdata_threads_init(); + // musl default thread stack size is 128k, let's set it to a higher value to avoid random crashes + if (default_stacksize < 1 * 1024 * 1024) + default_stacksize = 1 * 1024 * 1024; + + // make sure we are the only instance running + { + const char *run_dir = os_run_dir(true); + if(!run_dir) { + netdata_log_error("Cannot get/create a run directory."); + exit(1); + } + netdata_log_info("Netdata run directory is '%s'", run_dir); + + char lock_file[FILENAME_MAX]; + snprintfz(lock_file, sizeof(lock_file), "%s/netdata.lock", run_dir); + FILE_LOCK lock = file_lock_get(lock_file); + if(!FILE_LOCK_OK(lock)) { + netdata_log_error("Cannot get exclusive lock on file '%s'. Is Netdata already running?", lock_file); + exit(1); + } } - // ---------------------------------------------------------------------------------------------------------------- - // global configuration + // status and crash/update/exit detection + exit_initiated_reset(); + daemon_status_file_check_crash(); netdata_conf_ssl(); - netdata_conf_section_global(); // Get execution path before switching user to avoid permission issues get_netdata_execution_path(); @@ -831,12 +852,6 @@ int netdata_main(int argc, char **argv) { delta_startup_time("initialize static threads"); - // setup threads configs - default_stacksize = netdata_threads_init(); - // musl default thread stack size is 128k, let's set it to a higher value to avoid random crashes - if (default_stacksize < 1 * 1024 * 1024) - default_stacksize = 1 * 1024 * 1024; - for (i = 0; static_threads[i].name != NULL ; i++) { struct netdata_static_thread *st = &static_threads[i]; @@ -909,7 +924,6 @@ int netdata_main(int argc, char **argv) { #endif netdata_main_spawn_server_init("plugins", argc, (const char **)argv); - watcher_thread_start(); // init sentry #ifdef ENABLE_SENTRY @@ -936,7 +950,7 @@ int netdata_main(int argc, char **argv) { // initialize internal registry delta_startup_time("initialize registry"); - registry_init(); + registry_load(); cloud_conf_init_after_registry(); netdata_random_session_id_generate(); @@ -945,7 +959,6 @@ int netdata_main(int argc, char **argv) { delta_startup_time("collecting system info"); - netdata_anonymous_statistics_enabled=-1; struct rrdhost_system_info *system_info = rrdhost_system_info_create(); rrdhost_system_info_detect(system_info); @@ -967,18 +980,6 @@ int netdata_main(int argc, char **argv) { } abort_on_fatal_enable(); - delta_startup_time("check for incomplete shutdown"); - - char agent_crash_file[FILENAME_MAX + 1]; - char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; - snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); - int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0); - snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); - int crash_detected = (unlink(agent_crash_file) == 0); - int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 444); - if (fd >= 0) - close(fd); - // ------------------------------------------------------------------------ // Claim netdata agent to a cloud endpoint @@ -1038,17 +1039,17 @@ int netdata_main(int argc, char **argv) { analytics_statistic_t start_statistic = { "START", "-", "-" }; analytics_statistic_send(&start_statistic); - if (crash_detected) { + if (daemon_status_file_has_last_crashed()) { analytics_statistic_t crash_statistic = { "CRASH", "-", "-" }; analytics_statistic_send(&crash_statistic); } - if (incomplete_shutdown_detected) { + if (daemon_status_file_was_incomplete_shutdown()) { analytics_statistic_t incomplete_shutdown_statistic = { "INCOMPLETE_SHUTDOWN", "-", "-" }; analytics_statistic_send(&incomplete_shutdown_statistic); } - //check if ANALYTICS needs to start - if (netdata_anonymous_statistics_enabled == 1) { + // check if ANALYTICS needs to start + if (netdata_anonymous_statistics_enabled) { for (i = 0; static_threads[i].name != NULL; i++) { if (!strncmp(static_threads[i].name, "ANALYTICS", 9)) { struct netdata_static_thread *st = &static_threads[i]; @@ -1060,6 +1061,8 @@ int netdata_main(int argc, char **argv) { } webrtc_initialize(); + + daemon_status_file_save(DAEMON_STATUS_RUNNING); return 10; } diff --git a/src/daemon/pipename.c b/src/daemon/pipename.c index 70b6a25b4280cf..f16ad03bb09f6e 100644 --- a/src/daemon/pipename.c +++ b/src/daemon/pipename.c @@ -2,16 +2,27 @@ #include "pipename.h" -#include +#include "libnetdata/libnetdata.h" + +static const char *cached_pipename = NULL; const char *daemon_pipename(void) { + if(cached_pipename) + return cached_pipename; + const char *pipename = getenv("NETDATA_PIPENAME"); - if (pipename) + if (pipename) { + cached_pipename = strdupz(pipename); return pipename; + } -#ifdef _WIN32 - return "\\\\?\\pipe\\netdata-cli"; -#else - return "/tmp/netdata-ipc"; -#endif +//#if defined(OS_WINDOWS) +// cached_pipename = strdupz("\\\\?\\pipe\\netdata-cli"); +// return cached_pipename; +//#else + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/netdata.pipe", os_run_dir(false)); + cached_pipename = strdupz(filename); + return cached_pipename; +//#endif } diff --git a/src/daemon/pulse/pulse-daemon-memory-system.c b/src/daemon/pulse/pulse-daemon-memory-system.c index 3565773b73ee0f..b442561d29da2f 100644 --- a/src/daemon/pulse/pulse-daemon-memory-system.c +++ b/src/daemon/pulse/pulse-daemon-memory-system.c @@ -113,7 +113,7 @@ void pulse_daemon_memory_system_do(bool extended) { if(!extended) return; size_t glibc_mmaps = 0; - bool have_mallinfo = false; + bool have_mallinfo = false; (void)have_mallinfo; #ifdef HAVE_C_MALLINFO2 struct mallinfo2 mi = mallinfo2(); diff --git a/src/daemon/pulse/pulse-daemon-memory.c b/src/daemon/pulse/pulse-daemon-memory.c index 30e365c0ba32f7..f9772d86d160f4 100644 --- a/src/daemon/pulse/pulse-daemon-memory.c +++ b/src/daemon/pulse/pulse-daemon-memory.c @@ -275,7 +275,7 @@ void pulse_daemon_memory_do(bool extended __maybe_unused) { // ---------------------------------------------------------------------------------------------------------------- OS_SYSTEM_MEMORY sm = os_system_memory(true); - if (sm.ram_total_bytes && dbengine_out_of_memory_protection) { + if (OS_SYSTEM_MEMORY_OK(sm) && dbengine_out_of_memory_protection) { static RRDSET *st_memory_available = NULL; static RRDDIM *rd_available = NULL; diff --git a/src/daemon/pulse/pulse-workers.c b/src/daemon/pulse/pulse-workers.c index 6418699a4ba381..760d99dc601081 100644 --- a/src/daemon/pulse/pulse-workers.c +++ b/src/daemon/pulse/pulse-workers.c @@ -88,6 +88,8 @@ struct worker_utilization { double workers_cpu_max; double workers_cpu_total; + uint64_t memory_calls[WORKERS_MEMORY_CALL_MAX]; + struct worker_thread *threads; RRDSET *st_workers_time; @@ -113,6 +115,9 @@ struct worker_utilization { RRDSET *st_spinlocks_locks; RRDSET *st_spinlocks_spins; SPINLOCKS_JudyLSet spinlocks; + + RRDSET *st_memory_calls; + RRDDIM *rd_memory_calls[WORKERS_MEMORY_CALL_MAX]; }; static struct worker_utilization all_workers_utilization[] = { @@ -255,6 +260,46 @@ static void workers_total_spinlock_contention_chart(void) { } } +static void workers_total_memory_calls_chart(void) { + { + static RRDSET *st = NULL; + static RRDDIM *rd[WORKERS_MEMORY_CALL_MAX] = { NULL }; + uint64_t memory_calls[WORKERS_MEMORY_CALL_MAX] = { 0 }; + + if(unlikely(!st)) { + st = rrdset_create_localhost( + "netdata" + , "memory_calls_total" + , NULL + , "memory calls" + , "netdata.memory_calls_total" + , "Netdata Total Memory Calls" + , "calls" + , "netdata" + , "pulse" + , 920005 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + for (int j = 0; j < WORKERS_MEMORY_CALL_MAX; ++j) + rd[j] = rrddim_add(st, WORKERS_MEMORY_CALL_2str(j), NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + for(size_t i = 0; all_workers_utilization[i].name ;i++) { + struct worker_utilization *wu = &all_workers_utilization[i]; + + for (int j = 0; j < WORKERS_MEMORY_CALL_MAX; ++j) + memory_calls[j] += wu->memory_calls[j]; + } + + for (int j = 0; j < WORKERS_MEMORY_CALL_MAX; ++j) + rrddim_set_by_pointer(st, rd[j], (collected_number)memory_calls[j]); + + rrdset_done(st); + } +} + static void workers_total_cpu_utilization_chart(void) { size_t i, cpu_enabled = 0; for(i = 0; all_workers_utilization[i].name ;i++) @@ -653,6 +698,43 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { rrdset_done(wu->st_spinlocks_spins); } + // ---------------------------------------------------------------------- + // memory calls + + { + if(unlikely(!wu->st_memory_calls)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_memory_calls_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.memory_calls", wu->name_lowercase); + + wu->st_memory_calls = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Memory Calls" + , "calls" + , "netdata" + , "pulse" + , wu->priority + 8 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + + for(size_t i = 0; i < WORKERS_MEMORY_CALL_MAX; i++) { + if(!wu->rd_memory_calls[i]) + wu->rd_memory_calls[i] = rrddim_add(wu->st_memory_calls, WORKERS_MEMORY_CALL_2str(i), NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrddim_set_by_pointer(wu->st_memory_calls, wu->rd_memory_calls[i], (collected_number)wu->memory_calls[i]); + } + + rrdset_done(wu->st_memory_calls); + } + // ---------------------------------------------------------------------- // custom metric types WORKER_METRIC_ABSOLUTE @@ -815,6 +897,8 @@ static void workers_utilization_reset_statistics(struct worker_utilization *wu) wt->enabled = false; wt->cpu_enabled = false; } + + memset(wu->memory_calls, 0, sizeof(wu->memory_calls)); } #define TASK_STAT_PREFIX "/proc/self/task/" @@ -923,6 +1007,7 @@ static void worker_utilization_charts_callback(void *ptr , const char *spinlock_functions[] , size_t *spinlock_locks , size_t *spinlock_spins + , uint64_t *memory_calls ) { struct worker_utilization *wu = (struct worker_utilization *)ptr; @@ -1024,6 +1109,12 @@ static void worker_utilization_charts_callback(void *ptr wusp->locks += spinlock_locks[i]; wusp->spins += spinlock_spins[i]; } + + // ---------------------------------------------------------------------------------------------------------------- + // memory calls + + for(size_t i = 0; i < WORKERS_MEMORY_CALL_MAX ;i++) + wu->memory_calls[i] += memory_calls[i]; } void pulse_workers_cleanup(void) { @@ -1082,4 +1173,5 @@ void pulse_workers_do(bool extended) { workers_total_cpu_utilization_chart(); workers_total_spinlock_contention_chart(); + workers_total_memory_calls_chart(); } diff --git a/src/daemon/signals.c b/src/daemon/signals.c index 163f92ad8bc29d..19ca1d2bbea177 100644 --- a/src/daemon/signals.c +++ b/src/daemon/signals.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "common.h" +#include "daemon/daemon-status-file.h" typedef enum signal_action { NETDATA_SIGNAL_END_OF_LIST, @@ -16,17 +17,21 @@ static struct { const char *name; // the name of the signal size_t count; // the number of signals received SIGNAL_ACTION action; // the action to take + EXIT_REASON reason; } signals_waiting[] = { - { SIGPIPE, "SIGPIPE", 0, NETDATA_SIGNAL_IGNORE }, - { SIGINT , "SIGINT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, - { SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, - { SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, - { SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS }, - { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH }, - { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL }, - - // terminator - { 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST } + { SIGPIPE, "SIGPIPE", 0, NETDATA_SIGNAL_IGNORE, EXIT_REASON_NONE }, + { SIGINT , "SIGINT", 0, NETDATA_SIGNAL_EXIT_CLEANLY, EXIT_REASON_SIGINT }, + { SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY, EXIT_REASON_SIGQUIT }, + { SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY, EXIT_REASON_SIGTERM }, + { SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS, EXIT_REASON_NONE }, + { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH, EXIT_REASON_NONE }, + { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL, EXIT_REASON_SIGBUS }, + { SIGSEGV, "SIGSEGV", 0, NETDATA_SIGNAL_FATAL, EXIT_REASON_SIGSEGV }, + { SIGFPE, "SIGFPE", 0, NETDATA_SIGNAL_FATAL, EXIT_REASON_SIGFPE }, + { SIGILL, "SIGILL", 0, NETDATA_SIGNAL_FATAL, EXIT_REASON_SIGILL }, + + // terminator + { 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST, 0 } }; static void signal_handler(int signo) { @@ -108,6 +113,7 @@ void nd_process_signals(void) { // is delivered that either terminates the process or causes the invocation // of a signal-catching function. if(pause() == -1 && errno == EINTR) { + daemon_status_file_save(DAEMON_STATUS_NONE); errno_clear(); // loop once, but keep looping while signals are coming in @@ -144,11 +150,12 @@ void nd_process_signals(void) { nd_log_limits_unlimited(); netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name); commands_exit(); - netdata_cleanup_and_exit(0, NULL, NULL, NULL); + netdata_cleanup_and_exit(signals_waiting[i].reason, NULL, NULL, NULL); exit(0); break; case NETDATA_SIGNAL_FATAL: + exit_initiated_set(signals_waiting[i].reason); fatal("SIGNAL: Received %s. netdata now exits.", name); break; diff --git a/src/daemon/static_threads.c b/src/daemon/static_threads.c index 77694165e7ddfe..5dab8de9aa7dbe 100644 --- a/src/daemon/static_threads.c +++ b/src/daemon/static_threads.c @@ -2,6 +2,7 @@ #include "common.h" #include "web/api/queries/backfill.h" +#include "daemon-systemd-watcher.h" void *aclk_main(void *ptr); void *analytics_main(void *ptr); @@ -201,6 +202,16 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = backfill_thread }, + { + .name = "SDBUSWATCHER", + .config_section = NULL, + .config_name = NULL, + .enable_routine = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = systemd_watcher_thread + }, // terminator { diff --git a/src/daemon/winsvc.cc b/src/daemon/winsvc.cc index a56f5eb7c9b15f..eeb080e3120c3e 100644 --- a/src/daemon/winsvc.cc +++ b/src/daemon/winsvc.cc @@ -87,7 +87,7 @@ static HANDLE CreateEventHandle(const char *msg) static void *call_netdata_cleanup(void *arg) { - UNUSED(arg); + DWORD controlCode = *((DWORD *)arg); // Wait until we have to stop the service netdata_service_log("Cleanup thread waiting for stop event..."); @@ -95,7 +95,20 @@ static void *call_netdata_cleanup(void *arg) // Stop the agent netdata_service_log("Running netdata cleanup..."); - netdata_cleanup_and_exit(0, NULL, NULL, NULL); + EXIT_REASON reason; + switch(controlCode) { + case SERVICE_CONTROL_SHUTDOWN: + reason = (EXIT_REASON)(EXIT_REASON_SERVICE_STOP|EXIT_REASON_SYSTEM_SHUTDOWN); + break; + + case SERVICE_CONTROL_STOP: + // fall-through + + default: + reason = EXIT_REASON_SERVICE_STOP; + break; + } + netdata_cleanup_and_exit(reason, NULL, NULL, NULL); // Close event handle netdata_service_log("Closing stop event handle..."); @@ -112,6 +125,7 @@ static void WINAPI ServiceControlHandler(DWORD controlCode) { switch (controlCode) { + case SERVICE_CONTROL_SHUTDOWN: case SERVICE_CONTROL_STOP: { if (svc_status.dwCurrentState != SERVICE_RUNNING) @@ -126,7 +140,7 @@ static void WINAPI ServiceControlHandler(DWORD controlCode) netdata_service_log("Creating cleanup thread..."); char tag[NETDATA_THREAD_TAG_MAX + 1]; snprintfz(tag, NETDATA_THREAD_TAG_MAX, "%s", "CLEANUP"); - cleanup_thread = nd_thread_create(tag, NETDATA_THREAD_OPTION_JOINABLE, call_netdata_cleanup, NULL); + cleanup_thread = nd_thread_create(tag, NETDATA_THREAD_OPTION_JOINABLE, call_netdata_cleanup, &controlCode); // Signal the stop request netdata_service_log("Signalling the cleanup thread..."); @@ -176,7 +190,7 @@ void WINAPI ServiceMain(DWORD argc, LPSTR* argv) // Set status to running netdata_service_log("Setting service status to running..."); - if (!ReportSvcStatus(SERVICE_RUNNING, 0, 5000, SERVICE_ACCEPT_STOP)) + if (!ReportSvcStatus(SERVICE_RUNNING, 0, 5000, SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN)) { netdata_service_log("Failed to set service status to running."); return; diff --git a/src/database/contexts/rrdcontext.c b/src/database/contexts/rrdcontext.c index b2253aff6c0ef9..840beb26c1b85a 100644 --- a/src/database/contexts/rrdcontext.c +++ b/src/database/contexts/rrdcontext.c @@ -93,7 +93,7 @@ ALWAYS_INLINE void rrdcontext_collected_rrdset(RRDSET *st) { } ALWAYS_INLINE void rrdcontext_host_child_disconnected(RRDHOST *host) { - rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, false); + rrdhost_flag_set(host, RRDHOST_FLAG_RRDCONTEXT_GET_RETENTION); } ALWAYS_INLINE void rrdcontext_host_child_connected(RRDHOST *host) { diff --git a/src/database/contexts/worker.c b/src/database/contexts/worker.c index 63b48c6204e93f..2ad083af498ed3 100644 --- a/src/database/contexts/worker.c +++ b/src/database/contexts/worker.c @@ -1202,6 +1202,11 @@ void *rrdcontext_main(void *ptr) { if(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD)) continue; + if(rrdhost_flag_check(host, RRDHOST_FLAG_RRDCONTEXT_GET_RETENTION)) { + rrdcontext_recalculate_host_retention(host, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, false); + rrdhost_flag_clear(host, RRDHOST_FLAG_RRDCONTEXT_GET_RETENTION); + } + worker_is_busy(WORKER_JOB_HOSTS); if(host->rrdctx.pp_queue) { diff --git a/src/database/engine/cache.c b/src/database/engine/cache.c index a471ef655191cb..61c0e44acde671 100644 --- a/src/database/engine/cache.c +++ b/src/database/engine/cache.c @@ -401,7 +401,7 @@ static ssize_t cache_usage_per1000(PGC *cache, int64_t *size_to_evict) { if(cache->config.out_of_memory_protection_bytes) { // out of memory protection OS_SYSTEM_MEMORY sm = os_system_memory(false); - if(sm.ram_total_bytes) { + if(OS_SYSTEM_MEMORY_OK(sm)) { // when the total exists, ram_available_bytes is also right const int64_t ram_available_bytes = (int64_t)sm.ram_available_bytes; @@ -1042,11 +1042,13 @@ static inline void remove_and_free_page_not_in_any_queue_and_acquired_for_deleti static inline bool make_acquired_page_clean_and_evict_or_page_release(PGC *cache, PGC_PAGE *page) { pointer_check(cache, page); + WAITQ_PRIORITY prio = is_page_clean(page) ? PGC_QUEUE_LOCK_PRIO_EVICTORS : PGC_QUEUE_LOCK_PRIO_COLLECTORS; + page_transition_lock(cache, page); - pgc_queue_lock(cache, &cache->clean, PGC_QUEUE_LOCK_PRIO_EVICTORS); + pgc_queue_lock(cache, &cache->clean, prio); // make it clean - it does not have any accesses, so it will be prepended - page_set_clean(cache, page, true, true, PGC_QUEUE_LOCK_PRIO_EVICTORS); + page_set_clean(cache, page, true, true, prio); if(!acquired_page_get_for_deletion_or_release_it(cache, page)) { pgc_queue_unlock(cache, &cache->clean); @@ -1055,7 +1057,7 @@ static inline bool make_acquired_page_clean_and_evict_or_page_release(PGC *cache } // remove it from the linked list - pgc_queue_del(cache, &cache->clean, page, true, PGC_QUEUE_LOCK_PRIO_EVICTORS); + pgc_queue_del(cache, &cache->clean, page, true, prio); pgc_queue_unlock(cache, &cache->clean); page_transition_unlock(cache, page); @@ -2006,7 +2008,7 @@ PGC *pgc_create(const char *name, cache->config.out_of_memory_protection_bytes = (int64_t)dbengine_out_of_memory_protection; // partitions - if(partitions == 0) partitions = netdata_conf_cpus(); + if(partitions == 0) partitions = netdata_conf_cpus() * 2; if(partitions <= 4) partitions = 4; if(partitions > 256) partitions = 256; cache->config.partitions = partitions; @@ -2288,7 +2290,7 @@ bool pgc_flush_pages(PGC *cache) { } void pgc_page_hot_set_end_time_s(PGC *cache __maybe_unused, PGC_PAGE *page, time_t end_time_s, size_t additional_bytes) { - internal_fatal(!is_page_hot(page) && !netdata_exit, + internal_fatal(!is_page_hot(page) && !exit_initiated, "DBENGINE CACHE: end_time_s update on non-hot page"); internal_fatal(end_time_s < __atomic_load_n(&page->end_time_s, __ATOMIC_RELAXED), diff --git a/src/database/engine/datafile.c b/src/database/engine/datafile.c index 91490b99d3775a..af779c9d8f9262 100644 --- a/src/database/engine/datafile.c +++ b/src/database/engine/datafile.c @@ -260,7 +260,7 @@ int create_data_file(struct rrdengine_datafile *datafile) datafile->file = file; __atomic_add_fetch(&ctx->stats.datafile_creations, 1, __ATOMIC_RELAXED); - ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); + ret = posix_memalignz((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); if (unlikely(ret)) { fatal("DBENGINE: posix_memalign:%s", strerror(ret)); } @@ -278,7 +278,7 @@ int create_data_file(struct rrdengine_datafile *datafile) ctx_io_error(ctx); } uv_fs_req_cleanup(&req); - posix_memfree(superblock); + posix_memalign_freez(superblock); if (ret < 0) { destroy_data_file_unsafe(datafile); return ret; @@ -297,7 +297,7 @@ static int check_data_file_superblock(uv_file file) uv_buf_t iov; uv_fs_t req; - ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); + ret = posix_memalignz((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); if (unlikely(ret)) { fatal("DBENGINE: posix_memalign:%s", strerror(ret)); } @@ -321,7 +321,7 @@ static int check_data_file_superblock(uv_file file) ret = 0; } error: - posix_memfree(superblock); + posix_memalign_freez(superblock); return ret; } diff --git a/src/database/engine/journalfile.c b/src/database/engine/journalfile.c index 42c9776aea48a6..348b98f09daf1c 100644 --- a/src/database/engine/journalfile.c +++ b/src/database/engine/journalfile.c @@ -580,7 +580,7 @@ int journalfile_create(struct rrdengine_journalfile *journalfile, struct rrdengi journalfile->file = file; __atomic_add_fetch(&ctx->stats.journalfile_creations, 1, __ATOMIC_RELAXED); - ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); + ret = posix_memalignz((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); if (unlikely(ret)) { fatal("DBENGINE: posix_memalign:%s", strerror(ret)); } @@ -597,7 +597,7 @@ int journalfile_create(struct rrdengine_journalfile *journalfile, struct rrdengi ctx_io_error(ctx); } uv_fs_req_cleanup(&req); - posix_memfree(superblock); + posix_memalign_freez(superblock); if (ret < 0) { journalfile_destroy_unsafe(journalfile, datafile); return ret; @@ -617,7 +617,7 @@ static int journalfile_check_superblock(uv_file file) uv_buf_t iov; uv_fs_t req; - ret = posix_memalign((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); + ret = posix_memalignz((void *)&superblock, RRDFILE_ALIGNMENT, sizeof(*superblock)); if (unlikely(ret)) { fatal("DBENGINE: posix_memalign:%s", strerror(ret)); } @@ -643,7 +643,7 @@ static int journalfile_check_superblock(uv_file file) ret = 0; } error: - posix_memfree(superblock); + posix_memalign_freez(superblock); return ret; } @@ -813,7 +813,7 @@ static uint64_t journalfile_iterate_transactions(struct rrdengine_instance *ctx, file_size = journalfile->unsafe.pos; max_id = 1; - ret = posix_memalign((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES); + ret = posix_memalignz((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES); if (unlikely(ret)) fatal("DBENGINE: posix_memalign:%s", strerror(ret)); @@ -844,7 +844,7 @@ static uint64_t journalfile_iterate_transactions(struct rrdengine_instance *ctx, } } skip_file: - posix_memfree(buf); + posix_memalign_freez(buf); return max_id; } diff --git a/src/database/engine/page.c b/src/database/engine/page.c index 1e8694439ac1d7..281130a3ffe3dc 100644 --- a/src/database/engine/page.c +++ b/src/database/engine/page.c @@ -899,8 +899,12 @@ size_t pgd_append_point( if (pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING) pgd_fatal(pg, "Data collection on page already scheduled for flushing"); - if (!(pg->states & PGD_STATE_CREATED_FROM_COLLECTOR)) - pgd_fatal(pg, "DBENGINE: collection on page not created from a collector"); + if (!(pg->states & PGD_STATE_CREATED_FROM_COLLECTOR)) { + if(exit_initiated == EXIT_REASON_NONE) + pgd_fatal(pg, "DBENGINE: collection on page not created from a collector"); + else + return 0; + } if (unlikely(pg->used != expected_slot)) pgd_fatal(pg, "DBENGINE: page is not aligned to expected slot (used %u, expected %u)", diff --git a/src/database/engine/pdc.c b/src/database/engine/pdc.c index 74ec62a3bb2df7..f1edd70242c41f 100644 --- a/src/database/engine/pdc.c +++ b/src/database/engine/pdc.c @@ -474,7 +474,7 @@ static ALWAYS_INLINE EPDL_EXTENT *epdl_find_extent_base(EPDL *epdl) { e = callocz(1, sizeof(*e)); rw_spinlock_write_lock(&epdl->datafile->extent_epdl.spinlock); - Pvoid_t *PValue = JudyLIns(&epdl->datafile->extent_epdl.epdl_per_extent, epdl->extent_offset, PJE0); + PValue = JudyLIns(&epdl->datafile->extent_epdl.epdl_per_extent, epdl->extent_offset, PJE0); internal_fatal(!PValue || PValue == PJERR, "DBENGINE: corrupted pending extent judy"); if(!*PValue) { *PValue = e; @@ -1216,7 +1216,7 @@ static inline void *datafile_extent_read(struct rrdengine_instance *ctx, uv_file uv_fs_t request; unsigned real_io_size = ALIGN_BYTES_CEILING(size_bytes); - int ret = posix_memalign(&buffer, RRDFILE_ALIGNMENT, real_io_size); + int ret = posix_memalignz(&buffer, RRDFILE_ALIGNMENT, real_io_size); if (unlikely(ret)) fatal("DBENGINE: posix_memalign(): %s", strerror(ret)); @@ -1224,7 +1224,7 @@ static inline void *datafile_extent_read(struct rrdengine_instance *ctx, uv_file ret = uv_fs_read(NULL, &request, file, &iov, 1, (int64_t)pos, NULL); if (unlikely(-1 == ret)) { ctx_io_error(ctx); - posix_memfree(buffer); + posix_memalign_freez(buffer); buffer = NULL; } else @@ -1236,7 +1236,7 @@ static inline void *datafile_extent_read(struct rrdengine_instance *ctx, uv_file } static inline void datafile_extent_read_free(void *buffer) { - posix_memfree(buffer); + posix_memalign_freez(buffer); } NOT_INLINE_HOT void epdl_find_extent_and_populate_pages(struct rrdengine_instance *ctx, EPDL *epdl, bool worker) { diff --git a/src/database/engine/rrdengine.c b/src/database/engine/rrdengine.c index 4281f38c8424e2..c7119faf626164 100644 --- a/src/database/engine/rrdengine.c +++ b/src/database/engine/rrdengine.c @@ -367,7 +367,7 @@ static void wal_cleanup1(void) { spinlock_unlock(&wal_globals.protected.spinlock); if(wal) { - posix_memfree(wal->buf); + posix_memalign_freez(wal->buf); freez(wal); __atomic_sub_fetch(&wal_globals.atomics.allocated, 1, __ATOMIC_RELAXED); } @@ -393,7 +393,7 @@ WAL *wal_get(struct rrdengine_instance *ctx, unsigned size) { if(unlikely(!wal)) { wal = mallocz(sizeof(WAL)); wal->buf_size = RRDENG_BLOCK_SIZE; - int ret = posix_memalign((void *)&wal->buf, RRDFILE_ALIGNMENT, wal->buf_size); + int ret = posix_memalignz((void *)&wal->buf, RRDFILE_ALIGNMENT, wal->buf_size); if (unlikely(ret)) fatal("DBENGINE: posix_memalign:%s", strerror(ret)); __atomic_add_fetch(&wal_globals.atomics.allocated, 1, __ATOMIC_RELAXED); @@ -649,7 +649,7 @@ extent_flush_to_open(struct rrdengine_instance *ctx, struct extent_io_descriptor page_descriptor_release(descr); } - posix_memfree(xt_io_descr->buf); + posix_memalign_freez(xt_io_descr->buf); extent_io_descriptor_release(xt_io_descr); spinlock_lock(&datafile->writers.spinlock); @@ -763,7 +763,7 @@ datafile_extent_build(struct rrdengine_instance *ctx, struct page_descr_with_dat payload_offset = sizeof(*header) + count * sizeof(header->descr[0]); max_compressed_size = dbengine_max_compressed_size(uncompressed_payload_length, compression_algorithm); size_bytes = payload_offset + MAX(uncompressed_payload_length, max_compressed_size) + sizeof(*trailer); - ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes)); + ret = posix_memalignz((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes)); if (unlikely(ret)) { fatal("DBENGINE: posix_memalign:%s", strerror(ret)); /* freez(xt_io_descr);*/ diff --git a/src/database/rrddim-collection.c b/src/database/rrddim-collection.c index bf9cb308a6719a..bb57f3725912c6 100644 --- a/src/database/rrddim-collection.c +++ b/src/database/rrddim-collection.c @@ -11,37 +11,71 @@ static inline time_t tier_next_point_time_s(RRDDIM *rd, struct rrddim_tier *t, t return now_s + loop - ((now_s + loop) % loop); } -ALWAYS_INLINE_HOT void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAGE_POINT sp, usec_t now_ut __maybe_unused) { +#define LAST_COMPLETED_POINT_EXISTS(t) (t->last_completed_point.end_time_s != 0) + +ALWAYS_INLINE_HOT +void store_metric_at_tier_flush_last_completed(RRDDIM *rd __maybe_unused, size_t tier, struct rrddim_tier *t) { + // when there is no end_time_s we do not have a saved last_completed_point + if(!LAST_COMPLETED_POINT_EXISTS(t)) return; + + STORAGE_POINT *sp = &t->last_completed_point; + if(likely(!storage_point_is_unset(t->last_completed_point))) { + storage_engine_store_metric( + t->sch, + sp->end_time_s * USEC_PER_SEC, + sp->sum, + sp->min, + sp->max, + sp->count, + sp->anomaly_count, + sp->flags); + } + else { + storage_engine_store_metric( + t->sch, + sp->end_time_s * USEC_PER_SEC, + NAN, + NAN, + NAN, + 0, + 0, SN_FLAG_NONE); + } + + rrdset_done_statistics_points_stored_per_tier[tier]++; + + // make the point unset + t->last_completed_point.count = 0; // make it unset + t->last_completed_point.end_time_s = 0; // make it not saved +} + +ALWAYS_INLINE_HOT +void store_metric_at_tier_save_last_completed(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAGE_POINT sp) { + // make sure the last_completed_point is empty + store_metric_at_tier_flush_last_completed(rd, tier, t); + + // copy the point + t->last_completed_point = sp; + + // set the end_time_s, so that we will know we have saved a last_completed_point + t->last_completed_point.end_time_s = t->next_point_end_time_s; +} + +ALWAYS_INLINE_HOT +void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAGE_POINT sp, usec_t now_ut __maybe_unused) { + if(LAST_COMPLETED_POINT_EXISTS(t) && sp.start_time_s % t->last_completed_point_flush_modulo == 0) + store_metric_at_tier_flush_last_completed(rd, tier, t); + if (unlikely(!t->next_point_end_time_s)) t->next_point_end_time_s = tier_next_point_time_s(rd, t, sp.end_time_s); if(unlikely(sp.start_time_s >= t->next_point_end_time_s)) { // flush the virtual point, it is done - if (likely(!storage_point_is_unset(t->virtual_point))) { - - storage_engine_store_metric( - t->sch, - t->next_point_end_time_s * USEC_PER_SEC, - t->virtual_point.sum, - t->virtual_point.min, - t->virtual_point.max, - t->virtual_point.count, - t->virtual_point.anomaly_count, - t->virtual_point.flags); - } - else { - storage_engine_store_metric( - t->sch, - t->next_point_end_time_s * USEC_PER_SEC, - NAN, - NAN, - NAN, - 0, - 0, SN_FLAG_NONE); - } + if (likely(!storage_point_is_unset(t->virtual_point))) + store_metric_at_tier_save_last_completed(rd, tier, t, t->virtual_point); + else + store_metric_at_tier_save_last_completed(rd, tier, t, STORAGE_POINT_UNSET); - rrdset_done_statistics_points_stored_per_tier[tier]++; t->virtual_point.count = 0; // make the point unset t->next_point_end_time_s = tier_next_point_time_s(rd, t, sp.end_time_s); } @@ -73,6 +107,7 @@ ALWAYS_INLINE_HOT void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrdd } } +NOT_INLINE_HOT #ifdef NETDATA_LOG_COLLECTION_ERRORS void rrddim_store_metric_with_trace(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags, const char *function) { #else // !NETDATA_LOG_COLLECTION_ERRORS diff --git a/src/database/rrddim-collection.h b/src/database/rrddim-collection.h index 8549488ec942a7..56bc42d1bc6750 100644 --- a/src/database/rrddim-collection.h +++ b/src/database/rrddim-collection.h @@ -15,4 +15,6 @@ void rrddim_store_metric_with_trace(RRDDIM *rd, usec_t point_end_time_ut, NETDAT void rrddim_store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, SN_FLAGS flags); #endif +void store_metric_at_tier_flush_last_completed(RRDDIM *rd, size_t tier, struct rrddim_tier *t); + #endif //NETDATA_RRDDIM_COLLECTION_H diff --git a/src/database/rrddim.c b/src/database/rrddim.c index 8886f27c6052a0..c3511377c2a2a2 100644 --- a/src/database/rrddim.c +++ b/src/database/rrddim.c @@ -2,6 +2,7 @@ #include "rrd.h" #include "storage-engine.h" +#include "rrddim-collection.h" void rrddim_metadata_updated(RRDDIM *rd) { rrdcontext_updated_rrddim(rd); @@ -115,8 +116,13 @@ static void rrddim_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v size_t initialized = 0; for (size_t tier = 0; tier < nd_profile.storage_tiers; tier++) { if (rd->tiers[tier].smh) { + uint32_t tier_update_every = st->rrdhost->db[tier].tier_grouping * st->update_every; + + rd->tiers[tier].last_completed_point_flush_modulo = rrddim_collection_modulo(st, tier_update_every); + rd->tiers[tier].sch = - storage_metric_store_init(rd->tiers[tier].seb, rd->tiers[tier].smh, st->rrdhost->db[tier].tier_grouping * st->update_every, rd->rrdset->smg[tier]); + storage_metric_store_init(rd->tiers[tier].seb, rd->tiers[tier].smh, tier_update_every, rd->rrdset->smg[tier]); + initialized++; } } @@ -175,12 +181,15 @@ bool rrddim_finalize_collection_and_check_retention(RRDDIM *rd) { size_t tiers_available = 0, tiers_said_no_retention = 0; - for(size_t tier = 0; tier < nd_profile.storage_tiers;tier++) { + for(size_t tier = 0; tier < nd_profile.storage_tiers ;tier++) { spinlock_lock(&rd->tiers[tier].spinlock); if(rd->tiers[tier].sch) { tiers_available++; + if(tier > 0) + store_metric_at_tier_flush_last_completed(rd, tier, &rd->tiers[tier]); + if (storage_engine_store_finalize(rd->tiers[tier].sch)) tiers_said_no_retention++; diff --git a/src/database/rrdhost.h b/src/database/rrdhost.h index d92695f31f4436..084f4df7bccb0d 100644 --- a/src/database/rrdhost.h +++ b/src/database/rrdhost.h @@ -84,6 +84,7 @@ typedef enum __attribute__ ((__packed__)) rrdhost_flags { RRDHOST_FLAG_METADATA_CLAIMID = (1 << 27), // metadata needs to be stored in the database RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED = (1 << 28), // set when the host has updated global functions + RRDHOST_FLAG_RRDCONTEXT_GET_RETENTION = (1 << 29), // set when rrdcontext needs to update the retention of the host } RRDHOST_FLAGS; #define rrdhost_flag_get(host) atomic_flags_get(&((host)->flags)) diff --git a/src/database/rrdset-index-id.c b/src/database/rrdset-index-id.c index b5a963519eb985..a4587b0fcebc99 100644 --- a/src/database/rrdset-index-id.c +++ b/src/database/rrdset-index-id.c @@ -4,6 +4,22 @@ #include "rrdset-index-name.h" #include "rrdset-slots.h" +// -------------------------------------------------------------------------------------------------------------------- +// tier1/2 spread over time + +static size_t global_rrdset_counter = 0; +static uint16_t rrdset_collection_modulo_init(void) { + return __atomic_fetch_add(&global_rrdset_counter, 1, __ATOMIC_RELAXED) % 65535; +} + +uint16_t rrddim_collection_modulo(RRDSET *st, uint32_t spread) { + if(!spread) spread = 65535; + spread = MIN(spread, 65535); + return 1 + (st->collection_modulo % spread); +} + +// -------------------------------------------------------------------------------------------------------------------- + static inline void rrdset_update_permanent_labels(RRDSET *st) { if(!st->rrdlabels) return; @@ -11,7 +27,7 @@ static inline void rrdset_update_permanent_labels(RRDSET *st) { rrdlabels_add(st->rrdlabels, "_collect_module", rrdset_module_name(st), RRDLABEL_SRC_AUTO | RRDLABEL_FLAG_DONT_DELETE); } -// ---------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- // RRDSET index struct rrdset_constructor { @@ -56,6 +72,8 @@ static void rrdset_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v st->name = rrdset_fix_name(host, chart_full_id, ctr->type, NULL, ctr->id); rrdset_index_add_name(host, st); + st->collection_modulo = rrdset_collection_modulo_init(); + st->parts.id = string_strdupz(ctr->id); st->parts.type = string_strdupz(ctr->type); st->parts.name = string_strdupz(ctr->name); diff --git a/src/database/rrdset-index-id.h b/src/database/rrdset-index-id.h index 0a95d07f041fb4..e8ad1e762e247d 100644 --- a/src/database/rrdset-index-id.h +++ b/src/database/rrdset-index-id.h @@ -73,6 +73,8 @@ RRDSET_ACQUIRED *rrdset_find_and_acquire(RRDHOST *host, const char *id); void rrdset_acquired_release(RRDSET_ACQUIRED *rsa); RRDSET *rrdset_acquired_to_rrdset(RRDSET_ACQUIRED *rsa); +uint16_t rrddim_collection_modulo(RRDSET *st, uint32_t spread); + #define rrdset_find_localhost(id) rrdset_find(localhost, id) /* This will not return charts that are archived */ static inline RRDSET *rrdset_find_active_localhost(const char *id) { diff --git a/src/database/rrdset.h b/src/database/rrdset.h index 9c8d70c9083128..eb87f77ee0ec33 100644 --- a/src/database/rrdset.h +++ b/src/database/rrdset.h @@ -112,7 +112,8 @@ struct rrdset { // operational state members RRDSET_FLAGS flags; // flags - RRD_DB_MODE rrd_memory_mode; // the db mode of this rrdset + RRD_DB_MODE rrd_memory_mode; // the db mode of this rrdset + uint16_t collection_modulo; // tier1/2 spread over time DICTIONARY *rrddim_root_index; // dimensions index diff --git a/src/database/storage-engine.h b/src/database/storage-engine.h index ee7221bb1f6f1f..914fad583099ac 100644 --- a/src/database/storage-engine.h +++ b/src/database/storage-engine.h @@ -90,11 +90,13 @@ STORAGE_ENGINE* storage_engine_foreach_next(STORAGE_ENGINE* it); struct rrddim_tier { STORAGE_POINT virtual_point; - STORAGE_ENGINE_BACKEND seb; + STORAGE_POINT last_completed_point; // tier1/2 spread over time SPINLOCK spinlock; + STORAGE_ENGINE_BACKEND seb; + uint16_t last_completed_point_flush_modulo; // tier1/2 spread over time uint32_t tier_grouping; time_t next_point_end_time_s; - STORAGE_METRIC_HANDLE *smh; // the metric handle inside the database + STORAGE_METRIC_HANDLE *smh; // the metric handle inside the database STORAGE_COLLECT_HANDLE *sch; // the data collection handle }; diff --git a/src/health/health_dyncfg.c b/src/health/health_dyncfg.c index afc2e9b3bbc8d9..6758f75dfed8ef 100644 --- a/src/health/health_dyncfg.c +++ b/src/health/health_dyncfg.c @@ -95,7 +95,7 @@ static bool parse_config_value_database_lookup(json_object *jobj, const char *pa } static bool parse_config_value(json_object *jobj, const char *path, struct rrd_alert_config *config, BUFFER *error, bool strict) { - JSONC_PARSE_SUBOBJECT(jobj, path, "database_lookup", config, parse_config_value_database_lookup, error, strict); + JSONC_PARSE_SUBOBJECT_CB(jobj, path, "database_lookup", config, parse_config_value_database_lookup, error, strict); JSONC_PARSE_TXT2EXPRESSION_OR_ERROR_AND_RETURN(jobj, path, "calculation", config->calculation, error, false); JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "units", config->units, error, false); JSONC_PARSE_INT64_OR_ERROR_AND_RETURN(jobj, path, "update_every", config->update_every, error, strict); @@ -127,8 +127,8 @@ static bool parse_config_action(json_object *jobj, const char *path, struct rrd_ JSONC_PARSE_ARRAY_OF_TXT2BITMAP_OR_ERROR_AND_RETURN(jobj, path, "options", alert_action_options_parse_one, config->alert_action_options, error, strict); JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "execute", config->exec, error, strict); JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "recipient", config->recipient, error, strict); - JSONC_PARSE_SUBOBJECT(jobj, path, "delay", config, parse_config_action_delay, error, strict); - JSONC_PARSE_SUBOBJECT(jobj, path, "repeat", config, parse_config_action_repeat, error, strict); + JSONC_PARSE_SUBOBJECT_CB(jobj, path, "delay", config, parse_config_action_delay, error, strict); + JSONC_PARSE_SUBOBJECT_CB(jobj, path, "repeat", config, parse_config_action_repeat, error, strict); return true; } @@ -143,10 +143,10 @@ static bool parse_config(json_object *jobj, const char *path, RRD_ALERT_PROTOTYP JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "component", ap->config.component, error, false); JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, "classification", ap->config.classification, error, false); - JSONC_PARSE_SUBOBJECT(jobj, path, "value", &ap->config, parse_config_value, error, strict); - JSONC_PARSE_SUBOBJECT(jobj, path, "conditions", &ap->config, parse_config_conditions, error, false); - JSONC_PARSE_SUBOBJECT(jobj, path, "action", &ap->config, parse_config_action, error, false); - JSONC_PARSE_SUBOBJECT(jobj, path, "match", &ap->match, parse_match, error, strict); + JSONC_PARSE_SUBOBJECT_CB(jobj, path, "value", &ap->config, parse_config_value, error, strict); + JSONC_PARSE_SUBOBJECT_CB(jobj, path, "conditions", &ap->config, parse_config_conditions, error, false); + JSONC_PARSE_SUBOBJECT_CB(jobj, path, "action", &ap->config, parse_config_action, error, false); + JSONC_PARSE_SUBOBJECT_CB(jobj, path, "match", &ap->match, parse_match, error, strict); return true; } @@ -194,7 +194,7 @@ static bool parse_prototype(json_object *jobj, const char *path, RRD_ALERT_PROTO return false; } - JSONC_PARSE_SUBOBJECT(rule, path, "config", ap, parse_config, error, strict); + JSONC_PARSE_SUBOBJECT_CB(rule, path, "config", ap, parse_config, error, strict); ap = NULL; // so that we will create another one, if available } diff --git a/src/health/notifications/alarm-notify.sh.in b/src/health/notifications/alarm-notify.sh.in index 1170ddf5eed7a2..b2ec1c5fd341b7 100755 --- a/src/health/notifications/alarm-notify.sh.in +++ b/src/health/notifications/alarm-notify.sh.in @@ -138,7 +138,7 @@ log() { [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return - systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <rrdhost; time_t now = now_realtime_sec(); diff --git a/src/libnetdata/aral/aral.c b/src/libnetdata/aral/aral.c index 796b8b7f92706d..3903de89645f2c 100644 --- a/src/libnetdata/aral/aral.c +++ b/src/libnetdata/aral/aral.c @@ -86,7 +86,7 @@ struct aral_ops { struct { PAD64(size_t) allocators; // the number of threads currently trying to allocate memory PAD64(size_t) deallocators; // the number of threads currently trying to deallocate memory - PAD64(bool) last_allocated_or_deallocated; // stability detector, true when was last allocated + PAD64(bool) last_allocated_page; // stability detector, true when was last allocated } atomic; struct { @@ -97,6 +97,16 @@ struct aral_ops { }; struct aral { + struct { + SPINLOCK spinlock; + size_t file_number; // for mmap + + ARAL_PAGE *pages_free; // pages with free items + ARAL_PAGE *pages_full; // pages that are completely full + + ARAL_PAGE *pages_marked_free; // pages with marked items and free slots + ARAL_PAGE *pages_marked_full; // pages with marked items completely full + } aral_lock; struct { char name[ARAL_MAX_NAME + 1]; @@ -120,22 +130,8 @@ struct aral { } config; struct { - SPINLOCK spinlock; - size_t file_number; // for mmap - - ARAL_PAGE *pages_free; // pages with free items - ARAL_PAGE *pages_full; // pages that are completely full - - ARAL_PAGE *pages_marked_free; // pages with marked items and free slots - ARAL_PAGE *pages_marked_full; // pages with marked items completely full - - size_t defragment_operations; - size_t defragment_linked_list_traversals; - } aral_lock; - - struct { - size_t user_malloc_operations; - size_t user_free_operations; + PAD64(size_t) user_malloc_operations; + PAD64(size_t) user_free_operations; } atomic; struct aral_ops ops[2]; @@ -497,8 +493,10 @@ static ALWAYS_INLINE size_t aral_next_allocation_size___adders_lock_needed(ARAL size_t idx = mark_to_idx(marked); size_t size = ar->ops[idx].adders.allocation_size; - bool last_allocated = __atomic_load_n(&ar->ops[idx].atomic.last_allocated_or_deallocated, __ATOMIC_RELAXED); - if(last_allocated) { + bool last_allocated_page = __atomic_load_n(&ar->ops[idx].atomic.last_allocated_page, __ATOMIC_RELAXED); + if(last_allocated_page) { + // we are growing, double the size + size *= 2; if(size > ar->config.max_allocation_size) size = ar->config.max_allocation_size; @@ -512,7 +510,7 @@ static ALWAYS_INLINE size_t aral_next_allocation_size___adders_lock_needed(ARAL memory_alignment(sizeof(ARAL_PAGE), SYSTEM_REQUIRED_ALIGNMENT); } - __atomic_store_n(&ar->ops[idx].atomic.last_allocated_or_deallocated, true, __ATOMIC_RELAXED); + __atomic_store_n(&ar->ops[idx].atomic.last_allocated_page, true, __ATOMIC_RELAXED); return size; } @@ -615,7 +613,7 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ static void aral_del_page___no_lock_needed(ARAL *ar, ARAL_PAGE *page TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { size_t idx = mark_to_idx(page->started_marked); - __atomic_store_n(&ar->ops[idx].atomic.last_allocated_or_deallocated, true, __ATOMIC_RELAXED); + __atomic_store_n(&ar->ops[idx].atomic.last_allocated_page, false, __ATOMIC_RELAXED); struct aral_page_type_stats *stats; size_t max_elements = page->max_elements; diff --git a/src/libnetdata/buffer/buffer.c b/src/libnetdata/buffer/buffer.c index 9e239638f6f4ed..997ba585e00842 100644 --- a/src/libnetdata/buffer/buffer.c +++ b/src/libnetdata/buffer/buffer.c @@ -224,6 +224,11 @@ BUFFER *buffer_create(size_t size, size_t *statistics) { BUFFER *b; + if(!size) + size = 1024 - sizeof(BUFFER_OVERFLOW_EOF) - 2; + else + size++; // make room for the terminator + netdata_log_debug(D_WEB_BUFFER, "Creating new web buffer of size %zu.", size); b = callocz(1, sizeof(BUFFER)); @@ -263,10 +268,10 @@ void buffer_increase(BUFFER *b, size_t free_size_required) { if(remaining >= free_size_required) return; size_t increase = free_size_required - remaining; - size_t minimum = 128; + size_t minimum = 1024; if(minimum > increase) increase = minimum; - size_t optimal = (b->size > 5*1024*1024) ? b->size / 2 : b->size; + size_t optimal = (b->size > 5 * 1024 * 1024) ? b->size / 2 : b->size; if(optimal > increase) increase = optimal; netdata_log_debug(D_WEB_BUFFER, "Increasing data buffer from size %zu to %zu.", (size_t)b->size, (size_t)(b->size + increase)); diff --git a/src/libnetdata/buffer/buffer.h b/src/libnetdata/buffer/buffer.h index 21afac0777a427..17b602bea1fc59 100644 --- a/src/libnetdata/buffer/buffer.h +++ b/src/libnetdata/buffer/buffer.h @@ -84,7 +84,8 @@ static inline void _buffer_overflow_check(BUFFER *b __maybe_unused) { "BUFFER: detected overflow."); } -static ALWAYS_INLINE void buffer_flush(BUFFER *wb) { +ALWAYS_INLINE +static void buffer_flush(BUFFER *wb) { wb->len = 0; wb->json.depth = 0; @@ -117,7 +118,8 @@ void buffer_char_replace(BUFFER *wb, char from, char to); void buffer_print_sn_flags(BUFFER *wb, SN_FLAGS flags, bool send_anomaly_bit); -static ALWAYS_INLINE void buffer_need_bytes(BUFFER *buffer, size_t needed_free_size) { +ALWAYS_INLINE +static void buffer_need_bytes(BUFFER *buffer, size_t needed_free_size) { if(unlikely(buffer->len + needed_free_size >= buffer->size)) buffer_increase(buffer, needed_free_size + 1); } @@ -127,7 +129,8 @@ void buffer_json_initialize(BUFFER *wb, const char *key_quote, const char *value void buffer_json_finalize(BUFFER *wb); -static ALWAYS_INLINE const char *buffer_tostring(BUFFER *wb) +ALWAYS_INLINE +static const char *buffer_tostring(BUFFER *wb) { if(unlikely(!wb)) return NULL; @@ -140,7 +143,8 @@ static ALWAYS_INLINE const char *buffer_tostring(BUFFER *wb) return(wb->buffer); } -static ALWAYS_INLINE void _buffer_json_depth_push(BUFFER *wb, BUFFER_JSON_NODE_TYPE type) { +ALWAYS_INLINE +static void _buffer_json_depth_push(BUFFER *wb, BUFFER_JSON_NODE_TYPE type) { #ifdef NETDATA_INTERNAL_CHECKS assert(wb->json.depth <= BUFFER_JSON_MAX_DEPTH && "BUFFER JSON: max nesting reached"); #endif @@ -152,18 +156,21 @@ static ALWAYS_INLINE void _buffer_json_depth_push(BUFFER *wb, BUFFER_JSON_NODE_T wb->json.stack[wb->json.depth].type = type; } -static ALWAYS_INLINE void _buffer_json_depth_pop(BUFFER *wb) { +ALWAYS_INLINE +static void _buffer_json_depth_pop(BUFFER *wb) { wb->json.depth--; } -static ALWAYS_INLINE void buffer_putc(BUFFER *wb, char c) { +ALWAYS_INLINE +static void buffer_putc(BUFFER *wb, char c) { buffer_need_bytes(wb, 2); wb->buffer[wb->len++] = c; wb->buffer[wb->len] = '\0'; buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_fast_rawcat(BUFFER *wb, const char *txt, size_t len) { +ALWAYS_INLINE +static void buffer_fast_rawcat(BUFFER *wb, const char *txt, size_t len) { if(unlikely(!txt || !*txt || !len)) return; buffer_need_bytes(wb, len + 1); @@ -182,7 +189,8 @@ static ALWAYS_INLINE void buffer_fast_rawcat(BUFFER *wb, const char *txt, size_t buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_fast_strcat(BUFFER *wb, const char *txt, size_t len) { +ALWAYS_INLINE +static void buffer_fast_strcat(BUFFER *wb, const char *txt, size_t len) { if(unlikely(!txt || !*txt || !len)) return; buffer_need_bytes(wb, len + 1); @@ -209,7 +217,8 @@ static ALWAYS_INLINE void buffer_fast_strcat(BUFFER *wb, const char *txt, size_t buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_strcat(BUFFER *wb, const char *txt) { +ALWAYS_INLINE +static void buffer_strcat(BUFFER *wb, const char *txt) { if(unlikely(!txt || !*txt)) return; const char *t = txt; @@ -231,7 +240,8 @@ static ALWAYS_INLINE void buffer_strcat(BUFFER *wb, const char *txt) { buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_contents_replace(BUFFER *wb, const char *txt, size_t len) { +ALWAYS_INLINE +static void buffer_contents_replace(BUFFER *wb, const char *txt, size_t len) { wb->len = 0; buffer_need_bytes(wb, len + 1); @@ -242,7 +252,8 @@ static ALWAYS_INLINE void buffer_contents_replace(BUFFER *wb, const char *txt, s buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_strncat(BUFFER *wb, const char *txt, size_t len) { +ALWAYS_INLINE +static void buffer_strncat(BUFFER *wb, const char *txt, size_t len) { if(unlikely(!txt || !*txt)) return; buffer_need_bytes(wb, len + 1); @@ -255,7 +266,8 @@ static ALWAYS_INLINE void buffer_strncat(BUFFER *wb, const char *txt, size_t len buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_memcat(BUFFER *wb, const void *mem, size_t bytes) { +ALWAYS_INLINE +static void buffer_memcat(BUFFER *wb, const void *mem, size_t bytes) { if(unlikely(!mem)) return; buffer_need_bytes(wb, bytes + 1); @@ -268,7 +280,8 @@ static ALWAYS_INLINE void buffer_memcat(BUFFER *wb, const void *mem, size_t byte buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_json_strcat(BUFFER *wb, const char *txt) +ALWAYS_INLINE +static void buffer_json_strcat(BUFFER *wb, const char *txt) { if(unlikely(!txt || !*txt)) return; @@ -310,11 +323,20 @@ static ALWAYS_INLINE void buffer_json_strcat(BUFFER *wb, const char *txt) if(unlikely(*t < ' ')) { uint32_t v = *t++; *d++ = '\\'; - *d++ = 'u'; - *d++ = hex_digits[(v >> 12) & 0xf]; - *d++ = hex_digits[(v >> 8) & 0xf]; - *d++ = hex_digits[(v >> 4) & 0xf]; - *d++ = hex_digits[v & 0xf]; + switch (v) { + case '\n': *d++ = 'n'; break; + case '\r': *d++ = 'r'; break; + case '\t': *d++ = 't'; break; + case '\b': *d++ = 'b'; break; + case '\f': *d++ = 'f'; break; + default: + *d++ = 'u'; + *d++ = hex_digits[(v >> 12) & 0xf]; + *d++ = hex_digits[(v >> 8) & 0xf]; + *d++ = hex_digits[(v >> 4) & 0xf]; + *d++ = hex_digits[v & 0xf]; + break; + } } else { if (unlikely(*t == '\\' || *t == '\"')) @@ -333,7 +355,8 @@ static ALWAYS_INLINE void buffer_json_strcat(BUFFER *wb, const char *txt) buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_json_quoted_strcat(BUFFER *wb, const char *txt) { +ALWAYS_INLINE +static void buffer_json_quoted_strcat(BUFFER *wb, const char *txt) { if(unlikely(!txt || !*txt)) return; if(*txt == '"') @@ -372,13 +395,15 @@ static ALWAYS_INLINE void buffer_json_quoted_strcat(BUFFER *wb, const char *txt) // point the remaining value fits in 32 bits, and then calls // print_number_lu_r() to print the rest with 32 bit arithmetic. -static ALWAYS_INLINE char *print_uint32_reversed(char *dst, uint32_t value) { +ALWAYS_INLINE +static char *print_uint32_reversed(char *dst, uint32_t value) { char *d = dst; do *d++ = (char)('0' + (value % 10)); while((value /= 10)); return d; } -static ALWAYS_INLINE char *print_uint64_reversed(char *dst, uint64_t value) { +ALWAYS_INLINE +static char *print_uint64_reversed(char *dst, uint64_t value) { #ifdef ENV32BIT if(value <= (uint64_t)0xffffffff) return print_uint32_reversed(dst, value); @@ -394,14 +419,16 @@ static ALWAYS_INLINE char *print_uint64_reversed(char *dst, uint64_t value) { #endif } -static ALWAYS_INLINE char *print_uint32_hex_reversed(char *dst, uint32_t value) { +ALWAYS_INLINE +static char *print_uint32_hex_reversed(char *dst, uint32_t value) { static const char *digits = "0123456789ABCDEF"; char *d = dst; do *d++ = digits[value & 0xf]; while((value >>= 4)); return d; } -static ALWAYS_INLINE char *print_uint64_hex_reversed(char *dst, uint64_t value) { +ALWAYS_INLINE +static char *print_uint64_hex_reversed(char *dst, uint64_t value) { #ifdef ENV32BIT if(value <= (uint64_t)0xffffffff) return print_uint32_hex_reversed(dst, value); @@ -417,7 +444,8 @@ static ALWAYS_INLINE char *print_uint64_hex_reversed(char *dst, uint64_t value) #endif } -static ALWAYS_INLINE char *print_uint64_hex_reversed_full(char *dst, uint64_t value) { +ALWAYS_INLINE +static char *print_uint64_hex_reversed_full(char *dst, uint64_t value) { char *d = dst; for(size_t c = 0; c < sizeof(uint64_t) * 2; c++) { *d++ = hex_digits[value & 0xf]; @@ -427,19 +455,22 @@ static ALWAYS_INLINE char *print_uint64_hex_reversed_full(char *dst, uint64_t va return d; } -static ALWAYS_INLINE char *print_uint64_base64_reversed(char *dst, uint64_t value) { +ALWAYS_INLINE +static char *print_uint64_base64_reversed(char *dst, uint64_t value) { char *d = dst; do *d++ = base64_digits[value & 63]; while ((value >>= 6)); return d; } -static ALWAYS_INLINE void char_array_reverse(char *from, char *to) { +ALWAYS_INLINE +static void char_array_reverse(char *from, char *to) { // from and to are inclusive char *begin = from, *end = to, aux; while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; } -static ALWAYS_INLINE int print_netdata_double(char *dst, NETDATA_DOUBLE value) { +ALWAYS_INLINE +static int print_netdata_double(char *dst, NETDATA_DOUBLE value) { char *s = dst; if(unlikely(value < 0)) { @@ -502,7 +533,8 @@ static ALWAYS_INLINE int print_netdata_double(char *dst, NETDATA_DOUBLE value) { return (int)(d - dst); } -static ALWAYS_INLINE size_t print_uint64(char *dst, uint64_t value) { +ALWAYS_INLINE +static size_t print_uint64(char *dst, uint64_t value) { char *s = dst; char *d = print_uint64_reversed(s, value); char_array_reverse(s, d - 1); @@ -510,7 +542,8 @@ static ALWAYS_INLINE size_t print_uint64(char *dst, uint64_t value) { return d - s; } -static ALWAYS_INLINE size_t print_int64(char *dst, int64_t value) { +ALWAYS_INLINE +static size_t print_int64(char *dst, int64_t value) { size_t len = 0; if(value < 0) { @@ -523,20 +556,23 @@ static ALWAYS_INLINE size_t print_int64(char *dst, int64_t value) { } #define UINT64_MAX_LENGTH (24) // 21 should be enough -static ALWAYS_INLINE void buffer_print_uint64(BUFFER *wb, uint64_t value) { +ALWAYS_INLINE +static void buffer_print_uint64(BUFFER *wb, uint64_t value) { buffer_need_bytes(wb, UINT64_MAX_LENGTH); wb->len += print_uint64(&wb->buffer[wb->len], value); buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_print_int64(BUFFER *wb, int64_t value) { +ALWAYS_INLINE +static void buffer_print_int64(BUFFER *wb, int64_t value) { buffer_need_bytes(wb, UINT64_MAX_LENGTH); wb->len += print_int64(&wb->buffer[wb->len], value); buffer_overflow_check(wb); } #define UINT64_HEX_MAX_LENGTH ((sizeof(HEX_PREFIX) - 1) + (sizeof(uint64_t) * 2) + 1) -static ALWAYS_INLINE size_t print_uint64_hex(char *dst, uint64_t value) { +ALWAYS_INLINE +static size_t print_uint64_hex(char *dst, uint64_t value) { char *d = dst; const char *s = HEX_PREFIX; @@ -548,7 +584,8 @@ static ALWAYS_INLINE size_t print_uint64_hex(char *dst, uint64_t value) { return e - dst; } -static ALWAYS_INLINE size_t print_uint64_hex_full(char *dst, uint64_t value) { +ALWAYS_INLINE +static size_t print_uint64_hex_full(char *dst, uint64_t value) { char *d = dst; const char *s = HEX_PREFIX; @@ -560,20 +597,23 @@ static ALWAYS_INLINE size_t print_uint64_hex_full(char *dst, uint64_t value) { return e - dst; } -static ALWAYS_INLINE void buffer_print_uint64_hex(BUFFER *wb, uint64_t value) { +ALWAYS_INLINE +static void buffer_print_uint64_hex(BUFFER *wb, uint64_t value) { buffer_need_bytes(wb, UINT64_HEX_MAX_LENGTH); wb->len += print_uint64_hex(&wb->buffer[wb->len], value); buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_print_uint64_hex_full(BUFFER *wb, uint64_t value) { +ALWAYS_INLINE +static void buffer_print_uint64_hex_full(BUFFER *wb, uint64_t value) { buffer_need_bytes(wb, UINT64_HEX_MAX_LENGTH); wb->len += print_uint64_hex_full(&wb->buffer[wb->len], value); buffer_overflow_check(wb); } #define UINT64_B64_MAX_LENGTH ((sizeof(IEEE754_UINT64_B64_PREFIX) - 1) + (sizeof(uint64_t) * 2) + 1) -static ALWAYS_INLINE void buffer_print_uint64_base64(BUFFER *wb, uint64_t value) { +ALWAYS_INLINE +static void buffer_print_uint64_base64(BUFFER *wb, uint64_t value) { buffer_need_bytes(wb, UINT64_B64_MAX_LENGTH); buffer_fast_strcat(wb, IEEE754_UINT64_B64_PREFIX, sizeof(IEEE754_UINT64_B64_PREFIX) - 1); @@ -587,11 +627,12 @@ static ALWAYS_INLINE void buffer_print_uint64_base64(BUFFER *wb, uint64_t value) buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_print_int64_hex(BUFFER *wb, int64_t value) { +ALWAYS_INLINE +static void buffer_print_int64_hex(BUFFER *wb, int64_t value) { buffer_need_bytes(wb, 2); if(value < 0) { - buffer_fast_strcat(wb, "-", 1); + buffer_putc(wb, '-'); value = -value; } @@ -600,11 +641,12 @@ static ALWAYS_INLINE void buffer_print_int64_hex(BUFFER *wb, int64_t value) { buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_print_int64_base64(BUFFER *wb, int64_t value) { +ALWAYS_INLINE +static void buffer_print_int64_base64(BUFFER *wb, int64_t value) { buffer_need_bytes(wb, 2); if(value < 0) { - buffer_fast_strcat(wb, "-", 1); + buffer_putc(wb, '-'); value = -value; } @@ -614,7 +656,9 @@ static ALWAYS_INLINE void buffer_print_int64_base64(BUFFER *wb, int64_t value) { } #define DOUBLE_MAX_LENGTH (512) // 318 should be enough, including null -static ALWAYS_INLINE void buffer_print_netdata_double(BUFFER *wb, NETDATA_DOUBLE value) { + +ALWAYS_INLINE +static void buffer_print_netdata_double(BUFFER *wb, NETDATA_DOUBLE value) { buffer_need_bytes(wb, DOUBLE_MAX_LENGTH); if(isnan(value) || isinf(value)) { @@ -632,7 +676,8 @@ static ALWAYS_INLINE void buffer_print_netdata_double(BUFFER *wb, NETDATA_DOUBLE } #define DOUBLE_HEX_MAX_LENGTH ((sizeof(IEEE754_DOUBLE_HEX_PREFIX) - 1) + (sizeof(uint64_t) * 2) + 1) -static ALWAYS_INLINE void buffer_print_netdata_double_hex(BUFFER *wb, NETDATA_DOUBLE value) { +ALWAYS_INLINE +static void buffer_print_netdata_double_hex(BUFFER *wb, NETDATA_DOUBLE value) { buffer_need_bytes(wb, DOUBLE_HEX_MAX_LENGTH); uint64_t *ptr = (uint64_t *) (&value); @@ -648,7 +693,8 @@ static ALWAYS_INLINE void buffer_print_netdata_double_hex(BUFFER *wb, NETDATA_DO } #define DOUBLE_B64_MAX_LENGTH ((sizeof(IEEE754_DOUBLE_B64_PREFIX) - 1) + (sizeof(uint64_t) * 2) + 1) -static ALWAYS_INLINE void buffer_print_netdata_double_base64(BUFFER *wb, NETDATA_DOUBLE value) { +ALWAYS_INLINE +static void buffer_print_netdata_double_base64(BUFFER *wb, NETDATA_DOUBLE value) { buffer_need_bytes(wb, DOUBLE_B64_MAX_LENGTH); uint64_t *ptr = (uint64_t *) (&value); @@ -669,7 +715,8 @@ typedef enum { NUMBER_ENCODING_BASE64, } NUMBER_ENCODING; -static ALWAYS_INLINE void buffer_print_int64_encoded(BUFFER *wb, NUMBER_ENCODING encoding, int64_t value) { +ALWAYS_INLINE +static void buffer_print_int64_encoded(BUFFER *wb, NUMBER_ENCODING encoding, int64_t value) { if(encoding == NUMBER_ENCODING_BASE64) return buffer_print_int64_base64(wb, value); @@ -679,7 +726,8 @@ static ALWAYS_INLINE void buffer_print_int64_encoded(BUFFER *wb, NUMBER_ENCODING return buffer_print_int64(wb, value); } -static ALWAYS_INLINE void buffer_print_uint64_encoded(BUFFER *wb, NUMBER_ENCODING encoding, uint64_t value) { +ALWAYS_INLINE +static void buffer_print_uint64_encoded(BUFFER *wb, NUMBER_ENCODING encoding, uint64_t value) { if(encoding == NUMBER_ENCODING_BASE64) return buffer_print_uint64_base64(wb, value); @@ -689,7 +737,8 @@ static ALWAYS_INLINE void buffer_print_uint64_encoded(BUFFER *wb, NUMBER_ENCODIN return buffer_print_uint64(wb, value); } -static ALWAYS_INLINE void buffer_print_netdata_double_encoded(BUFFER *wb, NUMBER_ENCODING encoding, NETDATA_DOUBLE value) { +ALWAYS_INLINE +static void buffer_print_netdata_double_encoded(BUFFER *wb, NUMBER_ENCODING encoding, NETDATA_DOUBLE value) { if(encoding == NUMBER_ENCODING_BASE64) return buffer_print_netdata_double_base64(wb, value); @@ -699,7 +748,8 @@ static ALWAYS_INLINE void buffer_print_netdata_double_encoded(BUFFER *wb, NUMBER return buffer_print_netdata_double(wb, value); } -static ALWAYS_INLINE void buffer_print_spaces(BUFFER *wb, size_t spaces) { +ALWAYS_INLINE +static void buffer_print_spaces(BUFFER *wb, size_t spaces) { buffer_need_bytes(wb, spaces * 4 + 1); char *d = &wb->buffer[wb->len]; @@ -716,29 +766,33 @@ static ALWAYS_INLINE void buffer_print_spaces(BUFFER *wb, size_t spaces) { buffer_overflow_check(wb); } -static ALWAYS_INLINE void buffer_print_json_comma(BUFFER *wb) { +ALWAYS_INLINE +static void buffer_print_json_comma(BUFFER *wb) { if(wb->json.stack[wb->json.depth].count) - buffer_fast_strcat(wb, ",", 1); + buffer_putc(wb, ','); } -static ALWAYS_INLINE void buffer_print_json_comma_newline_spacing(BUFFER *wb) { +ALWAYS_INLINE +static void buffer_print_json_comma_newline_spacing(BUFFER *wb) { buffer_print_json_comma(wb); if((wb->json.options & BUFFER_JSON_OPTIONS_MINIFY) || (wb->json.stack[wb->json.depth].type == BUFFER_JSON_ARRAY && !(wb->json.options & BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS))) return; - buffer_fast_strcat(wb, "\n", 1); + buffer_putc(wb, '\n'); buffer_print_spaces(wb, wb->json.depth + 1); } -static ALWAYS_INLINE void buffer_print_json_key(BUFFER *wb, const char *key) { +ALWAYS_INLINE +static void buffer_print_json_key(BUFFER *wb, const char *key) { buffer_strcat(wb, wb->json.key_quote); buffer_json_strcat(wb, key); buffer_strcat(wb, wb->json.key_quote); } -static ALWAYS_INLINE void buffer_json_add_string_value(BUFFER *wb, const char *value) { +ALWAYS_INLINE +static void buffer_json_add_string_value(BUFFER *wb, const char *value) { if(value) { buffer_strcat(wb, wb->json.value_quote); buffer_json_strcat(wb, value); @@ -748,7 +802,8 @@ static ALWAYS_INLINE void buffer_json_add_string_value(BUFFER *wb, const char *v buffer_fast_strcat(wb, "null", 4); } -static ALWAYS_INLINE void buffer_json_add_quoted_string_value(BUFFER *wb, const char *value) { +ALWAYS_INLINE +static void buffer_json_add_quoted_string_value(BUFFER *wb, const char *value) { if(value) { buffer_strcat(wb, wb->json.value_quote); buffer_json_quoted_strcat(wb, value); @@ -773,17 +828,17 @@ static inline void buffer_json_object_close(BUFFER *wb) { assert(wb->json.stack[wb->json.depth].type == BUFFER_JSON_OBJECT && "BUFFER JSON: an object is not open to close it"); #endif if(!(wb->json.options & BUFFER_JSON_OPTIONS_MINIFY)) { - buffer_fast_strcat(wb, "\n", 1); + buffer_putc(wb, '\n'); buffer_print_spaces(wb, wb->json.depth); } - buffer_fast_strcat(wb, "}", 1); + buffer_putc(wb, '}'); _buffer_json_depth_pop(wb); } static inline void buffer_json_member_add_string(BUFFER *wb, const char *key, const char *value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); buffer_json_add_string_value(wb, value); wb->json.stack[wb->json.depth].count++; @@ -807,7 +862,7 @@ void buffer_json_member_add_duration_ut(BUFFER *wb, const char *key, int64_t dur static inline void buffer_json_member_add_quoted_string(BUFFER *wb, const char *key, const char *value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); if(!value || strcmp(value, "null") == 0) buffer_fast_strcat(wb, "null", 4); @@ -820,7 +875,7 @@ static inline void buffer_json_member_add_quoted_string(BUFFER *wb, const char * static inline void buffer_json_member_add_uuid_ptr(BUFFER *wb, const char *key, nd_uuid_t *value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); if(value && !uuid_is_null(*value)) { char uuid[GUID_LEN + 1]; @@ -836,10 +891,10 @@ static inline void buffer_json_member_add_uuid_ptr(BUFFER *wb, const char *key, static inline void buffer_json_member_add_uuid(BUFFER *wb, const char *key, nd_uuid_t value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); if(!uuid_is_null(value)) { - char uuid[GUID_LEN + 1]; + char uuid[UUID_STR_LEN]; uuid_unparse_lower(value, uuid); buffer_json_add_string_value(wb, uuid); } @@ -849,10 +904,26 @@ static inline void buffer_json_member_add_uuid(BUFFER *wb, const char *key, nd_u wb->json.stack[wb->json.depth].count++; } +static inline void buffer_json_member_add_uuid_compact(BUFFER *wb, const char *key, nd_uuid_t value) { + buffer_print_json_comma_newline_spacing(wb); + buffer_print_json_key(wb, key); + buffer_putc(wb, ':'); + + if(!uuid_is_null(value)) { + char uuid[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(value, uuid); + buffer_json_add_string_value(wb, uuid); + } + else + buffer_json_add_string_value(wb, NULL); + + wb->json.stack[wb->json.depth].count++; +} + static inline void buffer_json_member_add_boolean(BUFFER *wb, const char *key, bool value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); buffer_strcat(wb, value?"true":"false"); wb->json.stack[wb->json.depth].count++; @@ -865,7 +936,7 @@ static inline void buffer_json_member_add_array(BUFFER *wb, const char *key) { buffer_fast_strcat(wb, ":[", 2); } else - buffer_fast_strcat(wb, "[", 1); + buffer_putc(wb, '['); wb->json.stack[wb->json.depth].count++; @@ -876,13 +947,13 @@ static inline void buffer_json_add_array_item_array(BUFFER *wb) { if(!(wb->json.options & BUFFER_JSON_OPTIONS_MINIFY) && wb->json.stack[wb->json.depth].type == BUFFER_JSON_ARRAY) { // an array inside another array buffer_print_json_comma(wb); - buffer_fast_strcat(wb, "\n", 1); + buffer_putc(wb, '\n'); buffer_print_spaces(wb, wb->json.depth + 1); } else buffer_print_json_comma_newline_spacing(wb); - buffer_fast_strcat(wb, "[", 1); + buffer_putc(wb, '['); wb->json.stack[wb->json.depth].count++; _buffer_json_depth_push(wb, BUFFER_JSON_ARRAY); @@ -969,7 +1040,7 @@ static inline void buffer_json_add_array_item_time_t2ms(BUFFER *wb, time_t value static inline void buffer_json_add_array_item_object(BUFFER *wb) { buffer_print_json_comma_newline_spacing(wb); - buffer_fast_strcat(wb, "{", 1); + buffer_putc(wb, '{'); wb->json.stack[wb->json.depth].count++; _buffer_json_depth_push(wb, BUFFER_JSON_OBJECT); @@ -978,7 +1049,7 @@ static inline void buffer_json_add_array_item_object(BUFFER *wb) { static inline void buffer_json_member_add_time_t(BUFFER *wb, const char *key, time_t value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); buffer_print_int64(wb, value); wb->json.stack[wb->json.depth].count++; @@ -987,7 +1058,7 @@ static inline void buffer_json_member_add_time_t(BUFFER *wb, const char *key, ti static inline void buffer_json_member_add_time_t2ms(BUFFER *wb, const char *key, time_t value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); buffer_print_int64(wb, value); buffer_fast_strcat(wb, "000", 3); @@ -997,7 +1068,7 @@ static inline void buffer_json_member_add_time_t2ms(BUFFER *wb, const char *key, static inline void buffer_json_member_add_uint64(BUFFER *wb, const char *key, uint64_t value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); buffer_print_uint64(wb, value); wb->json.stack[wb->json.depth].count++; @@ -1006,7 +1077,7 @@ static inline void buffer_json_member_add_uint64(BUFFER *wb, const char *key, ui static inline void buffer_json_member_add_int64(BUFFER *wb, const char *key, int64_t value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); buffer_print_int64(wb, value); wb->json.stack[wb->json.depth].count++; @@ -1015,7 +1086,7 @@ static inline void buffer_json_member_add_int64(BUFFER *wb, const char *key, int static inline void buffer_json_member_add_double(BUFFER *wb, const char *key, NETDATA_DOUBLE value) { buffer_print_json_comma_newline_spacing(wb); buffer_print_json_key(wb, key); - buffer_fast_strcat(wb, ":", 1); + buffer_putc(wb, ':'); buffer_print_netdata_double(wb, value); wb->json.stack[wb->json.depth].count++; @@ -1027,11 +1098,11 @@ static inline void buffer_json_array_close(BUFFER *wb) { assert(wb->json.stack[wb->json.depth].type == BUFFER_JSON_ARRAY && "BUFFER JSON: an array is not open to close it"); #endif if(wb->json.options & BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS) { - buffer_fast_strcat(wb, "\n", 1); + buffer_putc(wb, '\n'); buffer_print_spaces(wb, wb->json.depth); } - buffer_fast_strcat(wb, "]", 1); + buffer_putc(wb, ']'); _buffer_json_depth_pop(wb); } diff --git a/src/libnetdata/common.h b/src/libnetdata/common.h index 7e1763a99620c3..38e1302baa961a 100644 --- a/src/libnetdata/common.h +++ b/src/libnetdata/common.h @@ -426,6 +426,11 @@ typedef uint32_t uid_t; // -------------------------------------------------------------------------------------------------------------------- +#define FUNCTION_RUN_ONCE() { static bool __run_once = false; if(__run_once) return; __run_once = true; } +#define FUNCTION_RUN_ONCE_RET(ret) { static bool __run_once = false; if(__run_once) return (ret); __run_once = true; } + +// -------------------------------------------------------------------------------------------------------------------- + #ifndef HOST_NAME_MAX #define HOST_NAME_MAX 256 #endif diff --git a/src/libnetdata/datetime/rfc3339.c b/src/libnetdata/datetime/rfc3339.c index 5c4e990ddb6a2c..ab2743e5a2c25c 100644 --- a/src/libnetdata/datetime/rfc3339.c +++ b/src/libnetdata/datetime/rfc3339.c @@ -66,7 +66,7 @@ usec_t rfc3339_parse_ut(const char *rfc3339, char **endptr) { char *s; usec_t timestamp, usec = 0; - // Use strptime to parse up to seconds + // Parse date and time (up to seconds) s = strptime(rfc3339, "%Y-%m-%dT%H:%M:%S", &tm); if (!s) return 0; // Parsing error @@ -78,29 +78,33 @@ usec_t rfc3339_parse_ut(const char *rfc3339, char **endptr) { int digits_parsed = (int)(next - (s + 1)); if (digits_parsed < 1 || digits_parsed > 9) - return 0; // parsing error + return 0; // Parsing error static const usec_t fix_usec[] = { - 1000000, // 0 digits (not used) - 100000, // 1 digit - 10000, // 2 digits - 1000, // 3 digits - 100, // 4 digits - 10, // 5 digits - 1, // 6 digits - 10, // 7 digits - 100, // 8 digits - 1000, // 9 digits + 1000000, // 0 digits (not used) + 100000, // 1 digit + 10000, // 2 digits + 1000, // 3 digits + 100, // 4 digits + 10, // 5 digits + 1, // 6 digits + 10, // 7 digits + 100, // 8 digits + 1000 // 9 digits }; - usec = digits_parsed <= 6 ? usec * fix_usec[digits_parsed] : usec / fix_usec[digits_parsed]; + + if (digits_parsed <= 6) + usec = usec * fix_usec[digits_parsed]; + else + usec = usec / fix_usec[digits_parsed]; s = next; } - // Check and parse timezone if present + // Parse timezone specification int tz_offset = 0; if (*s == '+' || *s == '-') { - // Parse the hours:mins part of the timezone + // Ensure format is correct: e.g. +02:00 or -05:30 if (!isdigit((uint8_t)s[1]) || !isdigit((uint8_t)s[2]) || s[3] != ':' || !isdigit((uint8_t)s[4]) || !isdigit((uint8_t)s[5])) @@ -108,8 +112,7 @@ usec_t rfc3339_parse_ut(const char *rfc3339, char **endptr) { char tz_sign = *s; tz_hours = (s[1] - '0') * 10 + (s[2] - '0'); - tz_mins = (s[4] - '0') * 10 + (s[5] - '0'); - + tz_mins = (s[4] - '0') * 10 + (s[5] - '0'); tz_offset = tz_hours * 3600 + tz_mins * 60; tz_offset *= (tz_sign == '+' ? 1 : -1); @@ -118,17 +121,53 @@ usec_t rfc3339_parse_ut(const char *rfc3339, char **endptr) { else if (*s == 'Z') s++; else - return 0; // Invalid RFC 3339 format + return 0; // Invalid RFC 3339 timezone specification - // Convert to time_t (assuming local time, then adjusting for timezone later) - time_t epoch_s = mktime(&tm); + // Convert struct tm to time_t in UTC + time_t epoch_s; + +#if defined(HAVE_TIMEGM) + // If available, use timegm() which interprets tm as UTC. + epoch_s = timegm(&tm); +#else + // Use mktime(), which assumes tm is local time, then adjust. + epoch_s = mktime(&tm); if (epoch_s == -1) return 0; // Error in time conversion +# if defined(HAVE_TM_GMTOFF) + // tm.tm_gmtoff is the offset (in seconds) of local time from UTC. + epoch_s -= tm.tm_gmtoff; +# else + // Fallback: compute the difference between localtime and gmtime. + { + struct tm local_tm, utc_tm; +#if defined(_POSIX_THREAD_SAFE_FUNCTIONS) && !defined(__APPLE__) + localtime_r(&epoch_s, &local_tm); + gmtime_r(&epoch_s, &utc_tm); +#else + // If thread-safe functions are not available, use localtime() and gmtime() + struct tm *lt = localtime(&epoch_s); + struct tm *gt = gmtime(&epoch_s); + if (!lt || !gt) + return 0; + local_tm = *lt; + utc_tm = *gt; +#endif + int local_offset = (local_tm.tm_hour - utc_tm.tm_hour) * 3600 + + (local_tm.tm_min - utc_tm.tm_min) * 60; + int day_diff = local_tm.tm_yday - utc_tm.tm_yday; + local_offset += day_diff * 86400; + epoch_s -= local_offset; + } +# endif +#endif + + // Combine seconds with fractional microseconds, then adjust for the RFC 3339 timezone. timestamp = (usec_t)epoch_s * USEC_PER_SEC + usec; timestamp -= tz_offset * USEC_PER_SEC; - if(endptr) + if (endptr) *endptr = s; return timestamp; diff --git a/src/libnetdata/exit/exit_initiated.c b/src/libnetdata/exit/exit_initiated.c new file mode 100644 index 00000000000000..f82d407c395c7a --- /dev/null +++ b/src/libnetdata/exit/exit_initiated.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +volatile EXIT_REASON exit_initiated = EXIT_REASON_NONE; + +ENUM_STR_MAP_DEFINE(EXIT_REASON) = { + { EXIT_REASON_SIGINT, "signal-interrupt"}, + { EXIT_REASON_SIGQUIT, "signal-quit"}, + { EXIT_REASON_SIGTERM, "signal-terminate"}, + { EXIT_REASON_SIGBUS, "signal-bus-error"}, + { EXIT_REASON_SIGSEGV, "signal-segmentation-fault"}, + { EXIT_REASON_SIGFPE, "signal-floating-point-exception"}, + { EXIT_REASON_SIGILL, "signal-illegal-instruction"}, + { EXIT_REASON_API_QUIT, "api-quit"}, + { EXIT_REASON_CMD_EXIT, "cmd-exit"}, + { EXIT_REASON_FATAL, "fatal"}, + { EXIT_REASON_SYSTEM_SHUTDOWN, "system-shutdown"}, + { EXIT_REASON_SERVICE_STOP, "service-stop"}, + { EXIT_REASON_UPDATE, "update"}, + + // terminator + {0, NULL}, +}; + +BITMAP_STR_DEFINE_FUNCTIONS(EXIT_REASON, EXIT_REASON_NONE, "none"); + +#if defined(OS_LINUX) +static bool is_system_shutdown_sysv(void) { + const char *shutdown_files[] = { + "/etc/nologin", // Created during shutdown + "/etc/halt", // SysV shutdown indicator + "/run/nologin", // Modern systems shutdown indicator + NULL + }; + + for (const char **file = shutdown_files; *file != NULL; file++) { + if (access(*file, F_OK) == 0) + return true; + } + + return false; +} + +static bool is_system_shutdown(void) { + return is_system_shutdown_sysv(); +} +#endif + +#if defined(OS_FREEBSD) +#include +static bool is_system_shutdown(void) { + int state = 0; + size_t state_len = sizeof(state); + + if (sysctlbyname("kern.shutdown", &state, &state_len, NULL, 0) == 0) + return state != 0; + + return false; +} +#endif + +#if defined(OS_MACOS) +#include +static bool is_system_shutdown(void) { + char buf[1024]; + size_t len = sizeof(buf); + + if (sysctlbyname("kern.shutdownstate", buf, &len, NULL, 0) == 0) + return true; + + if (access("/var/db/.SystemShutdown", F_OK) == 0) + return true; + + return false; +} +#endif + +#if defined(OS_WINDOWS) +#include +static bool is_system_shutdown(void) { + return GetSystemMetrics(SM_SHUTTINGDOWN) != 0; +} +#endif + +static const char *self_path = NULL; +static OS_FILE_METADATA self = { 0 }; + +void exit_initiated_reset(void) { + exit_initiated = EXIT_REASON_NONE; + + freez((char *)self_path); + self_path = os_get_process_path(); + if(self_path) + self = os_get_file_metadata(self_path); +} + +void exit_initiated_set(EXIT_REASON reason) { + if(exit_initiated == EXIT_REASON_NONE && !(reason & EXIT_REASON_SYSTEM_SHUTDOWN) && is_system_shutdown()) + reason |= EXIT_REASON_SYSTEM_SHUTDOWN; + + if(exit_initiated == EXIT_REASON_NONE && self_path && OS_FILE_METADATA_OK(self)) { + OS_FILE_METADATA self_now = os_get_file_metadata(self_path); + if(OS_FILE_METADATA_OK(self_now) && (self_now.modified_time != self.modified_time || self_now.size_bytes != self.size_bytes)) + reason |= EXIT_REASON_UPDATE; + } + + // we combine all of them together + // so that if this is called multiple times, + // we will have all of them + exit_initiated |= reason; +} diff --git a/src/libnetdata/exit/exit_initiated.h b/src/libnetdata/exit/exit_initiated.h new file mode 100644 index 00000000000000..960820ec378cce --- /dev/null +++ b/src/libnetdata/exit/exit_initiated.h @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EXIT_INITIATED_H +#define NETDATA_EXIT_INITIATED_H + +#include "../common.h" +#include "../template-enum.h" + +typedef enum { + EXIT_REASON_NONE = 0, + + // automatically detect when exit_initiated_set() is called + // supports Linux, FreeBSD, MacOS, Windows + EXIT_REASON_SYSTEM_SHUTDOWN = (1 << 0), // detected + + // signals - normal termination + EXIT_REASON_SIGQUIT = (1 << 1), // rare, but graceful + EXIT_REASON_SIGTERM = (1 << 2), // received on Linux, FreeBSD, MacOS + EXIT_REASON_SIGINT = (1 << 3), // received on Windows on normal termination + + // signals - abnormal termination + EXIT_REASON_SIGBUS = (1 << 4), + EXIT_REASON_SIGSEGV = (1 << 5), + EXIT_REASON_SIGFPE = (1 << 6), + EXIT_REASON_SIGILL = (1 << 7), + + // normal termination via APIs + EXIT_REASON_API_QUIT = (1 << 7), + EXIT_REASON_CMD_EXIT = (1 << 8), + + // abnormal termination via a fatal message + EXIT_REASON_FATAL = (1 << 9), + + // windows specific, service stop + EXIT_REASON_SERVICE_STOP = (1 << 10), + + // netdata update + EXIT_REASON_UPDATE = (1 << 11), +} EXIT_REASON; + +#define EXIT_REASON_NORMAL (EXIT_REASON_SIGINT|EXIT_REASON_SIGTERM|EXIT_REASON_SIGQUIT|EXIT_REASON_API_QUIT|EXIT_REASON_CMD_EXIT|EXIT_REASON_SERVICE_STOP|EXIT_REASON_SYSTEM_SHUTDOWN|EXIT_REASON_UPDATE) +#define EXIT_REASON_ABNORMAL (EXIT_REASON_SIGBUS|EXIT_REASON_SIGSEGV|EXIT_REASON_SIGFPE|EXIT_REASON_SIGILL|EXIT_REASON_FATAL) + +#define is_exit_reason_normal(reason) (((reason) & EXIT_REASON_NORMAL) && !((reason) & EXIT_REASON_ABNORMAL)) + +typedef struct web_buffer BUFFER; +BITMAP_STR_DEFINE_FUNCTIONS_EXTERN(EXIT_REASON); + +extern volatile EXIT_REASON exit_initiated; + +void exit_initiated_reset(void); +void exit_initiated_set(EXIT_REASON reason); + +#endif //NETDATA_EXIT_INITIATED_H diff --git a/src/libnetdata/facets/logs_query_status.h b/src/libnetdata/facets/logs_query_status.h index e41f2e82b990f4..566d1ec88dce1e 100644 --- a/src/libnetdata/facets/logs_query_status.h +++ b/src/libnetdata/facets/logs_query_status.h @@ -326,7 +326,8 @@ static inline void lqs_function_help(LOGS_QUERY_STATUS *lqs, BUFFER *wb) { ); } -static inline bool lqs_request_parse_json_payload(json_object *jobj, const char *path, void *data, BUFFER *error) { +static inline bool lqs_request_parse_json_payload(json_object *jobj, void *data, BUFFER *error) { + const char *path = ""; struct logs_query_data *qd = data; LOGS_QUERY_REQUEST *rq = qd->rq; BUFFER *wb = qd->wb; diff --git a/src/libnetdata/json/json-c-parser-inline.c b/src/libnetdata/json/json-c-parser-inline.c index a17847a3ef94a5..2dbb8e05221c8d 100644 --- a/src/libnetdata/json/json-c-parser-inline.c +++ b/src/libnetdata/json/json-c-parser-inline.c @@ -38,7 +38,7 @@ struct json_object *json_parse_function_payload_or_error(BUFFER *output, BUFFER json_tokener_free(tokener); CLEAN_BUFFER *error = buffer_create(0, NULL); - if(!cb(jobj, "", cb_data, error)) { + if(!cb(jobj, cb_data, error)) { char tmp[buffer_strlen(error) + 100]; snprintfz(tmp, sizeof(tmp), "JSON parser failed: %s", buffer_tostring(error)); *code = rrd_call_function_error(output, tmp, HTTP_RESP_BAD_REQUEST); @@ -50,3 +50,37 @@ struct json_object *json_parse_function_payload_or_error(BUFFER *output, BUFFER return jobj; } + +int json_parse_payload_or_error(BUFFER *payload, BUFFER *error, json_parse_function_payload_t cb, void *cb_data) { + if(!payload || !buffer_strlen(payload)) { + buffer_strcat(error, "No payload given, but a payload is required for this feature."); + return HTTP_RESP_BAD_REQUEST; + } + + struct json_tokener *tokener = json_tokener_new(); + if (!tokener) { + buffer_strcat(error, "Failed to initialize json parser."); + return HTTP_RESP_INTERNAL_SERVER_ERROR; + } + + struct json_object *jobj = json_tokener_parse_ex(tokener, buffer_tostring(payload), (int)buffer_strlen(payload)); + if (json_tokener_get_error(tokener) != json_tokener_success) { + const char *error_msg = json_tokener_error_desc(json_tokener_get_error(tokener)); + char tmp[strlen(error_msg) + 100]; + snprintf(tmp, sizeof(tmp), "JSON parser failed: %s", error_msg); + json_tokener_free(tokener); + buffer_strcat(error, tmp); + return HTTP_RESP_BAD_REQUEST; + } + json_tokener_free(tokener); + + if(!cb(jobj, cb_data, error)) { + if(!buffer_strlen(error)) + buffer_strcat(error, "Unknown error during parsing"); + json_object_put(jobj); + return HTTP_RESP_BAD_REQUEST; + } + + json_object_put(jobj); + return HTTP_RESP_OK; +} diff --git a/src/libnetdata/json/json-c-parser-inline.h b/src/libnetdata/json/json-c-parser-inline.h index e51cb232e77260..f4f33022559378 100644 --- a/src/libnetdata/json/json-c-parser-inline.h +++ b/src/libnetdata/json/json-c-parser-inline.h @@ -25,6 +25,17 @@ } \ } while(0) +#define JSONC_PARSE_TXT2CHAR_OR_ERROR_AND_RETURN(jobj, path, member, dst, error, required) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \ + strncpyz(dst, json_object_get_string(_j), sizeof(dst) - 1); \ + } \ + else if(required) { \ + buffer_sprintf(error, "missing or invalid type for '%s.%s' string", path, member); \ + return false; \ + } \ +} while(0) + #define JSONC_PARSE_TXT2STRDUPZ_OR_ERROR_AND_RETURN(jobj, path, member, dst, error, required) do { \ json_object *_j; \ if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \ @@ -140,7 +151,7 @@ typeof(dst) _bit = converter(_option_str); \ if (_bit == 0) { \ buffer_sprintf(error, "unknown option '%s' in '%s.%s' at index %zu", _option_str, path, member, _i); \ - return false; \ + /* return false; */ \ } \ dst |= _bit; \ } \ @@ -217,7 +228,7 @@ } \ } while(0) -#define JSONC_PARSE_SUBOBJECT(jobj, path, member, dst, callback, error, required) do { \ +#define JSONC_PARSE_SUBOBJECT_CB(jobj, path, member, dst, callback, error, required) do { \ json_object *_j; \ if (json_object_object_get_ex(jobj, member, &_j)) { \ char _new_path[strlen(path) + strlen(member) + 2]; \ @@ -231,8 +242,58 @@ } \ } while(0) -typedef bool (*json_parse_function_payload_t)(json_object *jobj, const char *path, void *data, BUFFER *error); +#define JSONC_TEMP_VAR(type, line) JSONC_TEMP_VAR_IMPL(type, line) +#define JSONC_TEMP_VAR_IMPL(type, line) _jsonc_temp_##type##line + +#define JSONC_PATH_CONCAT(path, sizeof_path, prefix, member, error) do { \ + size_t len = strlen(prefix); \ + if(len >= sizeof_path - 1) { \ + buffer_sprintf(error, "path too long while adding '%s'", member); \ + return false; \ + } \ + if(len) { \ + if(len >= sizeof_path - 2) { \ + buffer_sprintf(error, "path too long while adding '.' before '%s'", member); \ + return false; \ + } \ + strncpyz(path + len, ".", sizeof_path - len); \ + len++; \ + } \ + strncpyz(path + len, member, sizeof_path - len); \ +} while(0) + +#define JSONC_PARSE_SUBOBJECT(jobj, path, member, error, required, block) do { \ + BUILD_BUG_ON(sizeof(path) < 128); /* ensure path is an array of at least 128 bytes */ \ + json_object *JSONC_TEMP_VAR(_j, __LINE__); \ + if (!json_object_object_get_ex(jobj, member, &JSONC_TEMP_VAR(_j, __LINE__))) { \ + if(required) { \ + buffer_sprintf(error, "missing '%s.%s' object", *path ? path : "", member); \ + return false; \ + } \ + } \ + else { \ + if (!json_object_is_type(JSONC_TEMP_VAR(_j, __LINE__), json_type_object)) { \ + buffer_sprintf(error, "not an object '%s.%s'", *path ? path : "", member); \ + return false; \ + } \ + json_object *JSONC_TEMP_VAR(saved_jobj, __LINE__) = jobj; \ + jobj = JSONC_TEMP_VAR(_j, __LINE__); \ + char JSONC_TEMP_VAR(saved_path, __LINE__)[strlen(path) + 1]; \ + strncpyz(JSONC_TEMP_VAR(saved_path, __LINE__), path, sizeof(JSONC_TEMP_VAR(saved_path, __LINE__))); \ + JSONC_PATH_CONCAT(path, sizeof(path), path, member, error); \ + /* Run the user's code block */ \ + block \ + /* Restore the previous scope's values */ \ + jobj = JSONC_TEMP_VAR(saved_jobj, __LINE__); \ + strncpyz(path, JSONC_TEMP_VAR(saved_path, __LINE__), sizeof(path)); \ + } \ +} while(0) + +typedef bool (*json_parse_function_payload_t)(json_object *jobj, void *data, BUFFER *error); int rrd_call_function_error(BUFFER *wb, const char *msg, int code); struct json_object *json_parse_function_payload_or_error(BUFFER *output, BUFFER *payload, int *code, json_parse_function_payload_t cb, void *cb_data); +// return HTTP response code +int json_parse_payload_or_error(BUFFER *payload, BUFFER *error, json_parse_function_payload_t cb, void *cb_data); + #endif //NETDATA_JSON_C_PARSER_INLINE_H diff --git a/src/libnetdata/libjudy/judy-malloc.c b/src/libnetdata/libjudy/judy-malloc.c index 8f1f2eb248f0da..576411f61525c7 100644 --- a/src/libnetdata/libjudy/judy-malloc.c +++ b/src/libnetdata/libjudy/judy-malloc.c @@ -2,6 +2,9 @@ #include "judy-malloc.h" +// -------------------------------------------------------------------------------------------------------------------- +// Judy using ARAL + #define MAX_JUDY_SIZE_TO_ARAL 24 static bool judy_sizes_config[MAX_JUDY_SIZE_TO_ARAL + 1] = { [3] = true, @@ -15,11 +18,11 @@ static bool judy_sizes_config[MAX_JUDY_SIZE_TO_ARAL + 1] = { [15] = true, [23] = true, }; -static ARAL *judy_sizes_aral[MAX_JUDY_SIZE_TO_ARAL + 1] = {}; +static ARAL *judy_sizes_aral[MAX_JUDY_SIZE_TO_ARAL + 1] = { 0 }; -struct aral_statistics judy_sizes_aral_statistics = {}; +struct aral_statistics judy_sizes_aral_statistics = { 0 }; -__attribute__((constructor)) void aral_judy_init(void) { +static void aral_judy_init(void) { for(size_t Words = 0; Words <= MAX_JUDY_SIZE_TO_ARAL; Words++) if(judy_sizes_config[Words]) { char buf[30+1]; @@ -47,12 +50,15 @@ struct aral_statistics *judy_aral_statistics(void) { } static ARAL *judy_size_aral(Word_t Words) { - if(Words <= MAX_JUDY_SIZE_TO_ARAL && judy_sizes_aral[Words]) + if(Words <= MAX_JUDY_SIZE_TO_ARAL) return judy_sizes_aral[Words]; return NULL; } +// -------------------------------------------------------------------------------------------------------------------- +// Judy memory tracking + static __thread int64_t judy_allocated = 0; ALWAYS_INLINE void JudyAllocThreadPulseReset(void) { @@ -65,14 +71,57 @@ ALWAYS_INLINE int64_t JudyAllocThreadPulseGetAndReset(void) { return rc; } -inline Word_t JudyMalloc(Word_t Words) { +// -------------------------------------------------------------------------------------------------------------------- +// Judy dedicated jemalloc arena + +static unsigned jemalloc_arena_index = 0; +static bool jemalloc_initialized = false; + +#ifdef HAVE_JEMALLOC_ARENA_API +#include +static void jemalloc_init(void) { + // Create shared arena + size_t sz = sizeof(unsigned); + if (mallctl("arenas.create", &jemalloc_arena_index, &sz, NULL, 0) != 0) + return; + + // Disable thread cache for direct arena access + int cache_enabled = 0; + if (mallctl("thread.tcache.enabled", NULL, NULL, &cache_enabled, sizeof(bool)) != 0) + return; + + jemalloc_initialized = true; +} + +static void *jemalloc_malloc(Word_t Words) { + return mallocx(Words * sizeof(Word_t), MALLOCX_ARENA(jemalloc_arena_index)); +} + +static void jemalloc_free(void * PWord, Word_t Words __maybe_unused) { + if(PWord) + dallocx(PWord, MALLOCX_ARENA(jemalloc_arena_index)); +} +#endif + +// -------------------------------------------------------------------------------------------------------------------- +// Judy API + +inline Word_t JudyMalloc(Word_t Words) +{ Word_t Addr; - ARAL *ar = judy_size_aral(Words); - if(ar) - Addr = (Word_t) aral_mallocz(ar); +#ifdef HAVE_JEMALLOC_ARENA_API + if(jemalloc_initialized) + Addr = (Word_t)jemalloc_malloc(Words); else - Addr = (Word_t) mallocz(Words * sizeof(Word_t)); +#endif + { + ARAL *ar = judy_size_aral(Words); + if (ar) + Addr = (Word_t)aral_mallocz(ar); + else + Addr = (Word_t)mallocz(Words * sizeof(Word_t)); + } judy_allocated += Words * sizeof(Word_t); @@ -80,11 +129,18 @@ inline Word_t JudyMalloc(Word_t Words) { } inline void JudyFree(void * PWord, Word_t Words) { - ARAL *ar = judy_size_aral(Words); - if(ar) - aral_freez(ar, PWord); +#ifdef HAVE_JEMALLOC_ARENA_API + if(jemalloc_initialized) + jemalloc_free(PWord, Words); else - freez(PWord); +#endif + { + ARAL *ar = judy_size_aral(Words); + if (ar) + aral_freez(ar, PWord); + else + freez(PWord); + } judy_allocated -= Words * sizeof(Word_t); } @@ -96,3 +152,18 @@ Word_t JudyMallocVirtual(Word_t Words) { void JudyFreeVirtual(void * PWord, Word_t Words) { JudyFree(PWord, Words); } + +// -------------------------------------------------------------------------------------------------------------------- +// initialization + +void libjudy_malloc_init(void) { + // IMPORTANT: this is not called on external plugins + // the allocator should run even if this is not called + +#ifdef HAVE_JEMALLOC_ARENA_API + jemalloc_init(); + if(!jemalloc_initialized) +#endif + aral_judy_init(); +} + diff --git a/src/libnetdata/libjudy/judy-malloc.h b/src/libnetdata/libjudy/judy-malloc.h index 2f6cf69391a625..23975a4da0d912 100644 --- a/src/libnetdata/libjudy/judy-malloc.h +++ b/src/libnetdata/libjudy/judy-malloc.h @@ -12,4 +12,6 @@ struct aral_statistics *judy_aral_statistics(void); void JudyAllocThreadPulseReset(void); int64_t JudyAllocThreadPulseGetAndReset(void); +void libjudy_malloc_init(void); + #endif //NETDATA_JUDY_MALLOC_H diff --git a/src/libnetdata/libnetdata.c b/src/libnetdata/libnetdata.c index 986de3b7a000b0..5f642a6c53669e 100644 --- a/src/libnetdata/libnetdata.c +++ b/src/libnetdata/libnetdata.c @@ -8,8 +8,6 @@ struct rlimit rlimit_nofile = { .rlim_cur = 1024, .rlim_max = 1024 }; -volatile sig_atomic_t netdata_exit = 0; - // -------------------------------------------------------------------------------------------------------------------- void json_escape_string(char *dst, const char *src, size_t size) { diff --git a/src/libnetdata/libnetdata.h b/src/libnetdata/libnetdata.h index 7e362db9258463..dbc3ec55944fac 100644 --- a/src/libnetdata/libnetdata.h +++ b/src/libnetdata/libnetdata.h @@ -11,6 +11,7 @@ extern "C" { #include "memory/alignment.h" #include "memory/nd-mallocz.h" #include "memory/nd-mmap.h" +#include "libnetdata/exit/exit_initiated.h" #include "log/nd_log-fatal.h" #include "atomics/atomics.h" @@ -39,8 +40,6 @@ char *fgets_trim_len(char *buf, size_t buf_size, FILE *fp, size_t *len); int verify_netdata_host_prefix(bool log_msg); -extern volatile sig_atomic_t netdata_exit; - char *read_by_filename(const char *filename, long *file_size); char *find_and_replace(const char *src, const char *find, const char *replace, const char *where); @@ -58,9 +57,9 @@ bool run_command_and_copy_output_to_stdout(const char *command, int max_line_len struct web_buffer *run_command_and_get_output_to_buffer(const char *command, int max_line_length); #ifdef OS_WINDOWS -void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data); +void netdata_cleanup_and_exit(EXIT_REASON reason, const char *action, const char *action_result, const char *action_data); #else -void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) NORETURN; +void netdata_cleanup_and_exit(EXIT_REASON reason, const char *action, const char *action_result, const char *action_data) NORETURN; #endif extern const char *netdata_configured_host_prefix; diff --git a/src/libnetdata/log/nd_log-field-formatters.c b/src/libnetdata/log/nd_log-field-formatters.c index e1b3c0d08d8a27..ca3abe0139a716 100644 --- a/src/libnetdata/log/nd_log-field-formatters.c +++ b/src/libnetdata/log/nd_log-field-formatters.c @@ -7,7 +7,7 @@ int64_t log_field_to_int64(struct log_field *lf) { // --- FIELD_PARSER_VERSIONS --- // // IMPORTANT: - // THERE ARE 6 VERSIONS OF THIS CODE + // THERE ARE MULTIPLE VERSIONS OF THIS CODE // // 1. journal (direct socket API), // 2. journal (libsystemd API), @@ -69,7 +69,7 @@ uint64_t log_field_to_uint64(struct log_field *lf) { // --- FIELD_PARSER_VERSIONS --- // // IMPORTANT: - // THERE ARE 6 VERSIONS OF THIS CODE + // THERE ARE MULTIPLE VERSIONS OF THIS CODE // // 1. journal (direct socket API), // 2. journal (libsystemd API), @@ -125,3 +125,75 @@ uint64_t log_field_to_uint64(struct log_field *lf) { return 0; } + +char *log_field_strdupz(struct log_field *lf) { + + // --- FIELD_PARSER_VERSIONS --- + // + // IMPORTANT: + // THERE ARE MULTIPLE VERSIONS OF THIS CODE + // + // 1. journal (direct socket API), + // 2. journal (libsystemd API), + // 3. logfmt, + // 4. json, + // 5. convert to uint64 + // 6. convert to int64 + // + // UPDATE ALL OF THEM FOR NEW FEATURES OR FIXES + + CLEAN_BUFFER *tmp = NULL; + const char *s = NULL; + char buf[DOUBLE_MAX_LENGTH]; + + switch(lf->entry.type) { + default: + case NDFT_UNSET: + return NULL; + + case NDFT_UUID: + uuid_unparse_lower_compact(*lf->entry.uuid, buf); + s = buf; + break; + + case NDFT_TXT: + s = lf->entry.txt; + break; + + case NDFT_STR: + s = string2str(lf->entry.str); + break; + + case NDFT_BFR: + s = buffer_tostring(lf->entry.bfr); + break; + + case NDFT_CALLBACK: + tmp = buffer_create(0, NULL); + + if(lf->entry.cb.formatter(tmp, lf->entry.cb.formatter_data)) + s = buffer_tostring(tmp); + else + s = NULL; + break; + + case NDFT_U64: + print_uint64(buf, lf->entry.u64); + s = buf; + break; + + case NDFT_I64: + print_int64(buf, lf->entry.i64); + s = buf; + break; + + case NDFT_DBL: + print_netdata_double(buf, lf->entry.dbl); + break; + } + + if(s && *s) + return strdupz(s); + + return NULL; +} diff --git a/src/libnetdata/log/nd_log-init.c b/src/libnetdata/log/nd_log-init.c index c3b9523e15cd90..4802ddffe0b2f1 100644 --- a/src/libnetdata/log/nd_log-init.c +++ b/src/libnetdata/log/nd_log-init.c @@ -19,6 +19,12 @@ __attribute__((constructor)) void initialize_invocation_id(void) { nd_setenv("NETDATA_INVOCATION_ID", uuid, 1); } +ND_UUID nd_log_get_invocation_id(void) { + ND_UUID rc; + uuid_copy(rc.uuid, nd_log.invocation_id); + return rc; +} + // -------------------------------------------------------------------------------------------------------------------- void nd_log_initialize_for_external_plugins(const char *name) { @@ -88,8 +94,8 @@ void nd_log_initialize_for_external_plugins(const char *name) { switch(method) { case NDLM_JOURNAL: - if(!nd_log_journal_direct_init(getenv("NETDATA_SYSTEMD_JOURNAL_PATH")) || - !nd_log_journal_direct_init(NULL) || !nd_log_journal_systemd_init()) { + if(!nd_log_journal_direct_init(getenv("NETDATA_SYSTEMD_JOURNAL_PATH")) && + !nd_log_journal_direct_init(NULL) && !nd_log_journal_systemd_init()) { nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Failed to initialize journal. Using stderr."); method = NDLM_STDERR; } @@ -312,4 +318,3 @@ void nd_log_reopen_log_files_for_spawn_server(const char *name) { nd_log_initialize_for_external_plugins(name); } - diff --git a/src/libnetdata/log/nd_log-internals.h b/src/libnetdata/log/nd_log-internals.h index 6105f286add2e6..a9d97ada96da66 100644 --- a/src/libnetdata/log/nd_log-internals.h +++ b/src/libnetdata/log/nd_log-internals.h @@ -125,6 +125,7 @@ struct nd_log { nd_uuid_t invocation_id; ND_LOG_SOURCES overwrite_process_source; + log_event_t log_event_cb; struct nd_log_source sources[_NDLS_MAX]; @@ -148,6 +149,8 @@ struct nd_log { struct { bool etw; // when set use etw, otherwise wel bool initialized; + bool provider_enabled; // track etw provider state + SPINLOCK provider_lock; // Protect etw provider state access } eventlog; struct { @@ -209,6 +212,7 @@ const char *winerror_annotator(struct log_field *lf); uint64_t log_field_to_uint64(struct log_field *lf); int64_t log_field_to_int64(struct log_field *lf); +char *log_field_strdupz(struct log_field *lf); // -------------------------------------------------------------------------------------------------------------------- // common text formatters diff --git a/src/libnetdata/log/nd_log-libunwind.c b/src/libnetdata/log/nd_log-libunwind.c index fab1fb9c1c96d5..d78c4a4dd425fa 100644 --- a/src/libnetdata/log/nd_log-libunwind.c +++ b/src/libnetdata/log/nd_log-libunwind.c @@ -32,11 +32,11 @@ bool stack_trace_formatter(BUFFER *wb, void *data __maybe_unused) { unw_getcontext(&context); unw_init_local(&cursor, &context); - // Skip first 3 frames (our logging infrastructure) - for (int i = 0; i < 3; i++) { - if (unw_step(&cursor) <= 0) - goto cleanup; // Ensure proper cleanup if unwinding fails early - } +// // Skip first 3 frames (our logging infrastructure) +// for (int i = 0; i < 3; i++) { +// if (unw_step(&cursor) <= 0) +// goto cleanup; // Ensure proper cleanup if unwinding fails early +// } while (unw_step(&cursor) > 0) { unw_word_t offset, pc; @@ -58,7 +58,6 @@ bool stack_trace_formatter(BUFFER *wb, void *data __maybe_unused) { } } -cleanup: in_stack_trace = false; // Ensure the flag is reset return true; } diff --git a/src/libnetdata/log/nd_log-to-windows-events.c b/src/libnetdata/log/nd_log-to-windows-events.c index 6d1767be6796dc..d1823441c42e59 100644 --- a/src/libnetdata/log/nd_log-to-windows-events.c +++ b/src/libnetdata/log/nd_log-to-windows-events.c @@ -172,9 +172,27 @@ static void etw_set_source_meta(struct nd_log_source *source, USHORT channelID, source->Keyword = ed->Keyword; } +// Callback for provider enable/disable notifications +static void NTAPI ProviderEnableCallback( + LPCGUID SourceId __maybe_unused, + ULONG IsEnabled, + UCHAR Level __maybe_unused, + ULONGLONG MatchAnyKeyword __maybe_unused, + ULONGLONG MatchAllKeyword __maybe_unused, + PEVENT_FILTER_DESCRIPTOR FilterData __maybe_unused, + PVOID CallbackContext __maybe_unused +) { + spinlock_lock(&nd_log.eventlog.provider_lock); + nd_log.eventlog.provider_enabled = IsEnabled ? true : false; + spinlock_unlock(&nd_log.eventlog.provider_lock); +} + static bool etw_register_provider(void) { + spinlock_init(&nd_log.eventlog.provider_lock); + nd_log.eventlog.provider_enabled = false; + // Register the ETW provider - if (EventRegister(&NETDATA_ETW_PROVIDER_GUID, NULL, NULL, ®Handle) != ERROR_SUCCESS) + if (EventRegister(&NETDATA_ETW_PROVIDER_GUID, ProviderEnableCallback, NULL, ®Handle) != ERROR_SUCCESS) return false; etw_set_source_meta(&nd_log.sources[NDLS_DAEMON], CHANNEL_DAEMON, &ED_DAEMON_INFO_MESSAGE_ONLY); @@ -185,7 +203,23 @@ static bool etw_register_provider(void) { etw_set_source_meta(&nd_log.sources[NDLS_UNSET], CHANNEL_DAEMON, &ED_DAEMON_INFO_MESSAGE_ONLY); etw_set_source_meta(&nd_log.sources[NDLS_DEBUG], CHANNEL_DAEMON, &ED_DAEMON_INFO_MESSAGE_ONLY); - return true; + DWORD wait_start = GetTickCount(); + while(true) { + spinlock_lock(&nd_log.eventlog.provider_lock); + bool enabled = nd_log.eventlog.provider_enabled; + spinlock_unlock(&nd_log.eventlog.provider_lock); + + if(enabled) + return true; + + // Timeout after 5 seconds + if(GetTickCount() - wait_start > 5000) { + EventUnregister(regHandle); + return false; + } + + Sleep(10); // Short sleep between checks + } } #endif @@ -353,7 +387,7 @@ static const char *get_field_value_unsafe(struct log_field *fields, ND_LOG_FIELD break; case NDFT_UUID: if (!uuid_is_null(*fields[i].entry.uuid)) { - uuid_unparse_lower(*fields[i].entry.uuid, number_str); + uuid_unparse_lower_compact(*fields[i].entry.uuid, number_str); s = number_str; } break; diff --git a/src/libnetdata/log/nd_log.c b/src/libnetdata/log/nd_log.c index 1e284a9317c2d1..ef3a6d2d584bcb 100644 --- a/src/libnetdata/log/nd_log.c +++ b/src/libnetdata/log/nd_log.c @@ -108,12 +108,41 @@ static ND_LOG_METHOD nd_logger_select_output(ND_LOG_SOURCES source, FILE **fpp, return output; } +// -------------------------------------------------------------------------------------------------------------------- + +static __thread bool nd_log_event_this = false; + +static void nd_log_event(struct log_field *fields, size_t fields_max __maybe_unused) { + if(!nd_log_event_this) + return; + + nd_log_event_this = false; + + if(!nd_log.log_event_cb) + return; + + const char *filename = log_field_strdupz(&fields[NDF_FILE]); + const char *message = log_field_strdupz(&fields[NDF_MESSAGE]); + const char *function = log_field_strdupz(&fields[NDF_FUNC]); + const char *stack_trace = log_field_strdupz(&fields[NDF_STACK_TRACE]); + long line = log_field_to_int64(&fields[NDF_LINE]); + + nd_log.log_event_cb(filename, function, message, stack_trace, line); +} + +void nd_log_register_event_cb(log_event_t cb) { + nd_log.log_event_cb = cb; +} + // -------------------------------------------------------------------------------------------------------------------- // high level logger static void nd_logger_log_fields(SPINLOCK *spinlock, FILE *fp, bool limit, ND_LOG_FIELD_PRIORITY priority, ND_LOG_METHOD output, struct nd_log_source *source, struct log_field *fields, size_t fields_max) { + + nd_log_event(fields, fields_max); + if(spinlock) spinlock_lock(spinlock); @@ -437,6 +466,9 @@ void netdata_logger_fatal(const char *file, const char *function, const unsigned #endif } + // send this event to deamon_status_file + nd_log_event_this = true; + int saved_errno = errno; size_t saved_winerror = 0; #if defined(OS_WINDOWS) @@ -467,7 +499,7 @@ void netdata_logger_fatal(const char *file, const char *function, const unsigned snprintfz(action_data, 70, "%04lu@%-10.10s:%-15.15s/%d", line, file, function, saved_errno); const char *thread_tag = nd_thread_tag(); - const char *tag_to_send = thread_tag; + const char *tag_to_send = thread_tag; // anonymize thread names if(strncmp(thread_tag, THREAD_TAG_STREAM_RECEIVER, strlen(THREAD_TAG_STREAM_RECEIVER)) == 0) @@ -475,8 +507,8 @@ void netdata_logger_fatal(const char *file, const char *function, const unsigned if(strncmp(thread_tag, THREAD_TAG_STREAM_SENDER, strlen(THREAD_TAG_STREAM_SENDER)) == 0) tag_to_send = THREAD_TAG_STREAM_SENDER; - char action_result[60+1]; - snprintfz(action_result, 60, "%s:%s", program_name, tag_to_send); + char action_result[200+1]; + snprintfz(action_result, 60, "%s:%s:%s", program_name, tag_to_send, function); #if !defined(ENABLE_SENTRY) && defined(HAVE_BACKTRACE) int fd = nd_log.sources[NDLS_DAEMON].fd; @@ -495,5 +527,5 @@ void netdata_logger_fatal(const char *file, const char *function, const unsigned abort(); #endif - netdata_cleanup_and_exit(1, "FATAL", action_result, action_data); + netdata_cleanup_and_exit(EXIT_REASON_FATAL, "FATAL", action_result, action_data); } diff --git a/src/libnetdata/log/nd_log.h b/src/libnetdata/log/nd_log.h index 24235d77d5f634..64ac52c50b035c 100644 --- a/src/libnetdata/log/nd_log.h +++ b/src/libnetdata/log/nd_log.h @@ -31,6 +31,10 @@ ND_LOG_FIELD_ID nd_log_field_id_by_journal_name(const char *field, size_t len); int nd_log_priority2id(const char *priority); const char *nd_log_id2priority(ND_LOG_FIELD_PRIORITY priority); const char *nd_log_method_for_external_plugins(const char *s); +ND_UUID nd_log_get_invocation_id(void); + +typedef void (*log_event_t)(const char *filename, const char *function, const char *message, const char *stack_trace, long line); +void nd_log_register_event_cb(log_event_t cb); int nd_log_health_fd(void); int nd_log_collectors_fd(void); diff --git a/src/libnetdata/log/nd_wevents_manifest.xml b/src/libnetdata/log/nd_wevents_manifest.xml deleted file mode 100644 index 9e326c1cbc9013..00000000000000 --- a/src/libnetdata/log/nd_wevents_manifest.xml +++ /dev/null @@ -1,295 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/libnetdata/log/systemd-cat-native.c b/src/libnetdata/log/systemd-cat-native.c index c7459c78a9d5cf..6c1df3828d7246 100644 --- a/src/libnetdata/log/systemd-cat-native.c +++ b/src/libnetdata/log/systemd-cat-native.c @@ -545,8 +545,8 @@ static int help(void) { " The parameter --newline=STRING allows setting the string to be replaced\n" " with newlines.\n" "\n" - " For example by setting --newline='--NEWLINE--', the program will replace\n" - " all occurrences of --NEWLINE-- with the newline character, within each\n" + " With the default setting of --newline='\\n', the program will replace\n" + " all occurrences of \\n with the newline character, within each\n" " VALUE of the KEY=VALUE lines. Once this this done, the program will\n" " switch the field to the binary Journal Export Format before sending the\n" " log event to systemd-journal.\n" @@ -741,7 +741,7 @@ int main(int argc, char *argv[]) { int timeout_ms = 0; // wait forever bool log_as_netdata = false; - const char *newline = NULL; + const char *newline = "\\n"; const char *namespace = NULL; const char *socket = getenv("NETDATA_SYSTEMD_JOURNAL_PATH"); #ifdef HAVE_LIBCURL diff --git a/src/libnetdata/log/systemd-cat-native.md b/src/libnetdata/log/systemd-cat-native.md index b0b15f403dbfc9..29e9c262da1309 100644 --- a/src/libnetdata/log/systemd-cat-native.md +++ b/src/libnetdata/log/systemd-cat-native.md @@ -34,19 +34,7 @@ printf "MESSAGE=hey, this is error\nPRIORITY=3\n\n" | systemd-cat-native The result: ![image](https://github.com/netdata/netdata/assets/2662304/faf3eaa5-ac56-415b-9de8-16e6ceed9280) -Sending multi-line log entries (in this example we replace the text `--NEWLINE--` with a newline in the log entry): - -```bash -printf "MESSAGE=hello--NEWLINE--world\nPRIORITY=6\n\n" | systemd-cat-native --newline='--NEWLINE--' -``` - -The result: - -![image](https://github.com/netdata/netdata/assets/2662304/d6037b4a-87da-4693-ae67-e07df0decdd9) - - -Processing the standard `\n` string can be tricky due to shell escaping. This works, but note that -we have to add a lot of backslashes to printf. +The program supports multi-line processing for all fields. The default newline sequence is `\n`. ```bash printf "MESSAGE=hello\\\\nworld\nPRIORITY=6\n\n" | systemd-cat-native --newline='\n' @@ -61,6 +49,17 @@ PRIORITY=6 ``` +It also allows changing the newline sequence. In this example we replace the text `--NEWLINE--` with a newline in the log entry: + +```bash +printf "MESSAGE=hello--NEWLINE--world\nPRIORITY=6\n\n" | systemd-cat-native --newline='--NEWLINE--' +``` + +The result: + +![image](https://github.com/netdata/netdata/assets/2662304/d6037b4a-87da-4693-ae67-e07df0decdd9) + + ## Best practices These are the rules about fields, enforced by `systemd-journald`: diff --git a/src/libnetdata/log/systemd-journal-helpers.c b/src/libnetdata/log/systemd-journal-helpers.c index 50505f788bd18a..bdb26fc0f82ea1 100644 --- a/src/libnetdata/log/systemd-journal-helpers.c +++ b/src/libnetdata/log/systemd-journal-helpers.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "systemd-journal-helpers.h" +#include "../libnetdata.h" bool is_path_unix_socket(const char *path) { // Check if the path is valid diff --git a/src/libnetdata/log/systemd-journal-helpers.h b/src/libnetdata/log/systemd-journal-helpers.h index a85f8e85a9ffa8..56ab2065a98dee 100644 --- a/src/libnetdata/log/systemd-journal-helpers.h +++ b/src/libnetdata/log/systemd-journal-helpers.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-3.0-or-later -#include "../libnetdata.h" +#include "../common.h" #ifndef NETDATA_LOG_SYSTEMD_JOURNAL_HELPERS_H #define NETDATA_LOG_SYSTEMD_JOURNAL_HELPERS_H diff --git a/src/libnetdata/log/wevt_netdata_install.bat b/src/libnetdata/log/wevt_netdata_install.bat index 51560759274b39..8ce4a10bf5c9b1 100644 --- a/src/libnetdata/log/wevt_netdata_install.bat +++ b/src/libnetdata/log/wevt_netdata_install.bat @@ -47,6 +47,13 @@ if %errorlevel% neq 0 ( exit /b 1 ) +echo. +echo Setting default event sizes... +wevtutil sl "Netdata/Daemon" /ms:104857600 +wevtutil sl "Netdata/Collectors" /ms:104857600 +wevtutil sl "Netdata/Health" /ms:104857600 +wevtutil sl "Netdata/Access" /ms:104857600 + echo. echo Netdata Event Tracing for Windows manifest installed successfully. exit /b 0 diff --git a/src/libnetdata/memory/nd-mallocz.c b/src/libnetdata/memory/nd-mallocz.c index 035a5e8a1ab879..f1ff78a219951f 100644 --- a/src/libnetdata/memory/nd-mallocz.c +++ b/src/libnetdata/memory/nd-mallocz.c @@ -387,6 +387,8 @@ void freez_int(void *ptr, const char *file, const char *function, size_t line) { #else ALWAYS_INLINE char *strdupz(const char *s) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_STRDUP); + char *t = strdup(s); if (unlikely(!t)) { OS_SYSTEM_MEMORY sm = os_last_reported_system_memory(); @@ -396,6 +398,8 @@ ALWAYS_INLINE char *strdupz(const char *s) { } ALWAYS_INLINE char *strndupz(const char *s, size_t len) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_STRNDUP); + char *t = strndup(s, len); if (unlikely(!t)) { OS_SYSTEM_MEMORY sm = os_last_reported_system_memory(); @@ -406,10 +410,14 @@ ALWAYS_INLINE char *strndupz(const char *s, size_t len) { // If ptr is NULL, no operation is performed. ALWAYS_INLINE void freez(void *ptr) { - if(likely(ptr)) free(ptr); + if(likely(ptr)) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_FREE); + free(ptr); + } } ALWAYS_INLINE void *mallocz(size_t size) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_MALLOC); void *p = malloc(size); if (unlikely(!p)) { OS_SYSTEM_MEMORY sm = os_last_reported_system_memory(); @@ -419,6 +427,7 @@ ALWAYS_INLINE void *mallocz(size_t size) { } ALWAYS_INLINE void *callocz(size_t nmemb, size_t size) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_CALLOC); void *p = calloc(nmemb, size); if (unlikely(!p)) { OS_SYSTEM_MEMORY sm = os_last_reported_system_memory(); @@ -428,6 +437,7 @@ ALWAYS_INLINE void *callocz(size_t nmemb, size_t size) { } ALWAYS_INLINE void *reallocz(void *ptr, size_t size) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_REALLOC); void *p = realloc(ptr, size); if (unlikely(!p)) { OS_SYSTEM_MEMORY sm = os_last_reported_system_memory(); @@ -436,7 +446,13 @@ ALWAYS_INLINE void *reallocz(void *ptr, size_t size) { return p; } -ALWAYS_INLINE void posix_memfree(void *ptr) { +ALWAYS_INLINE int posix_memalignz(void **memptr, size_t alignment, size_t size) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_POSIX_MEMALIGN); + return posix_memalign(memptr, alignment, size); +} + +ALWAYS_INLINE void posix_memalign_freez(void *ptr) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_POSIX_MEMALIGN_FREE); free(ptr); } #endif diff --git a/src/libnetdata/memory/nd-mallocz.h b/src/libnetdata/memory/nd-mallocz.h index b7bda24d06e3a3..8c83d2187a2cac 100644 --- a/src/libnetdata/memory/nd-mallocz.h +++ b/src/libnetdata/memory/nd-mallocz.h @@ -60,6 +60,8 @@ void freez(void *ptr); #endif // NETDATA_TRACE_ALLOCATIONS void mallocz_release_as_much_memory_to_the_system(void); -void posix_memfree(void *ptr); + +int posix_memalignz(void **memptr, size_t alignment, size_t size); +void posix_memalign_freez(void *ptr); #endif //NETDATA_ND_MALLOCZ_H diff --git a/src/libnetdata/memory/nd-mmap.c b/src/libnetdata/memory/nd-mmap.c index 9e0ca91f96f3d1..23599392accfb2 100644 --- a/src/libnetdata/memory/nd-mmap.c +++ b/src/libnetdata/memory/nd-mmap.c @@ -115,7 +115,7 @@ inline int madvise_mergeable(void *mem __maybe_unused, size_t len __maybe_unused #define THP_SIZE (2 * 1024 * 1024) // 2 MiB THP size #define THP_MASK (THP_SIZE - 1) // Mask for alignment check -inline int madvise_thp(void *mem, size_t len) { +inline int madvise_thp(void *mem __maybe_unused, size_t len __maybe_unused) { #ifdef MADV_HUGEPAGE // Check if the size is at least THP size and aligned if (len >= THP_SIZE && ((uintptr_t)mem & THP_MASK) == 0) { @@ -130,6 +130,7 @@ int nd_munmap(void *ptr, size_t size) { malloc_trace_munmap(size); #endif + workers_memory_call(WORKERS_MEMORY_CALL_MUNMAP); int rc = munmap(ptr, size); if(rc == 0) { @@ -141,6 +142,8 @@ int nd_munmap(void *ptr, size_t size) { } void *nd_mmap(void *addr, size_t len, int prot, int flags, int fd, off_t offset) { + workers_memory_call(WORKERS_MEMORY_CALL_MMAP); + void *rc = mmap(addr, len, prot, flags, fd, offset); if(rc != MAP_FAILED) { diff --git a/src/libnetdata/os/boot_id.c b/src/libnetdata/os/boot_id.c new file mode 100644 index 00000000000000..dea2113351c27b --- /dev/null +++ b/src/libnetdata/os/boot_id.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "boot_id.h" +#include "libnetdata/libnetdata.h" + +static ND_UUID cached_boot_id = { 0 }; +static SPINLOCK spinlock = SPINLOCK_INITIALIZER; + +#if defined(OS_LINUX) + +static ND_UUID get_boot_id(void) { + ND_UUID boot_id = { 0 }; + char buf[UUID_STR_LEN]; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, sizeof(filename), "%s/proc/sys/kernel/random/boot_id", + netdata_configured_host_prefix ? netdata_configured_host_prefix : ""); + + // Try reading the official boot_id first + if (read_txt_file(filename, buf, sizeof(buf)) == 0) { + if (uuid_parse(trim(buf), boot_id.uuid) == 0) + return boot_id; + } + + // Fallback to boottime-based ID + time_t boottime = os_boottime(); + if(boottime > 0) { + boot_id.parts.low64 = (uint64_t)boottime; + // parts.hig64 remains 0 to indicate this is a synthetic boot_id + } + + return boot_id; +} + +#else // !OS_LINUX + +static ND_UUID get_boot_id(void) { + ND_UUID boot_id = { 0 }; + + time_t boottime = os_boottime(); + if(boottime > 0) { + boot_id.parts.low64 = (uint64_t)boottime; + // parts.hig64 remains 0 to indicate this is a synthetic boot_id + } + + return boot_id; +} + +#endif // OS_LINUX + +ND_UUID os_boot_id(void) { + // Fast path - return cached value if available + if(!UUIDiszero(cached_boot_id)) + return cached_boot_id; + + spinlock_lock(&spinlock); + + // Check again under lock in case another thread set it + if(UUIDiszero(cached_boot_id)) { + cached_boot_id = get_boot_id(); + } + + spinlock_unlock(&spinlock); + return cached_boot_id; +} + +bool os_boot_ids_match(ND_UUID a, ND_UUID b) { + if(UUIDeq(a, b)) + return true; + + if(a.parts.hig64 == 0 && b.parts.hig64 == 0) { + uint64_t diff = a.parts.low64 > b.parts.low64 ? a.parts.low64 - b.parts.low64 : b.parts.low64 - a.parts.low64; + if(diff <= 3) + return true; + } + + return false; +} diff --git a/src/libnetdata/os/boot_id.h b/src/libnetdata/os/boot_id.h new file mode 100644 index 00000000000000..7d7d19e6a7c08b --- /dev/null +++ b/src/libnetdata/os/boot_id.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_OS_BOOT_ID_H +#define NETDATA_OS_BOOT_ID_H + +#include "libnetdata/common.h" +#include "libnetdata/uuid/uuid.h" + +/** + * Get system boot ID + * + * Returns a UUID that remains constant during system uptime. + * On Linux, this is the systemd boot_id. + * On other systems, this uses the system boot time to generate a unique ID. + * + * The value is cached after first call. + * Returns UUID_ZERO on error. + * + * @return ND_UUID The boot ID + */ +ND_UUID os_boot_id(void); + +bool os_boot_ids_match(ND_UUID a, ND_UUID b); + +#endif diff --git a/src/libnetdata/os/boottime.c b/src/libnetdata/os/boottime.c new file mode 100644 index 00000000000000..d40ea03fc0c52e --- /dev/null +++ b/src/libnetdata/os/boottime.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" + +static time_t cached_boottime = 0; +static SPINLOCK spinlock = SPINLOCK_INITIALIZER; + +#if defined(OS_LINUX) + +static time_t calculate_boottime(void) { + char buf[8192]; + + char filename[FILENAME_MAX + 1]; + + // Try to read from /proc/stat first - this provides the absolute timestamp + snprintfz(filename, sizeof(filename), "%s/proc/stat", + netdata_configured_host_prefix ? netdata_configured_host_prefix : ""); + if (read_txt_file(filename, buf, sizeof(buf)) == 0) { + char *btime_line = strstr(buf, "btime "); + if (btime_line) { + time_t btime = (time_t)str2ull(btime_line + 6, NULL); + if (btime > 0) + return btime; + } + } + + // If btime is not available, calculate it from uptime + snprintfz(filename, sizeof(filename), "%s/proc/uptime", + netdata_configured_host_prefix ? netdata_configured_host_prefix : ""); + if (read_txt_file(filename, buf, sizeof(buf)) == 0) { + double uptime; + if (sscanf(buf, "%lf", &uptime) == 1) { + time_t now = now_realtime_sec(); + time_t boottime = now - (time_t)uptime; + if(boottime > 0) + return boottime; + } + } + + return 0; +} + +#elif defined(OS_FREEBSD) || defined(OS_MACOS) + +#include + +static time_t calculate_boottime(void) { + struct timeval boottime; + size_t size = sizeof(boottime); + + // kern.boottime provides the absolute timestamp + if (sysctlbyname("kern.boottime", &boottime, &size, NULL, 0) == 0) + return boottime.tv_sec; + + return 0; +} + +#elif defined(OS_WINDOWS) + +#include + +static time_t calculate_boottime(void) { + ULONGLONG uptime_ms = GetTickCount64(); + if (uptime_ms > 0) { + FILETIME ft; + ULARGE_INTEGER now; + + GetSystemTimeAsFileTime(&ft); + now.HighPart = ft.dwHighDateTime; + now.LowPart = ft.dwLowDateTime; + + // Convert to Unix epoch (subtract Windows epoch) + ULONGLONG unix_time_ms = (now.QuadPart - 116444736000000000ULL) / 10000; + time_t boottime = (time_t)((unix_time_ms - uptime_ms) / 1000); + + if(boottime > 0) + return boottime; + } + + return 0; +} + +#endif + +static time_t get_stable_boottime(void) { + const int max_attempts = 100; + const int required_matches = 5; + time_t last_boottime = 0; + int matches = 0; + + for(int i = 0; i < max_attempts; i++) { + time_t new_boottime = calculate_boottime(); + if(new_boottime == 0) + new_boottime = now_realtime_sec() - now_boottime_sec(); + + if(new_boottime == last_boottime) + matches++; + else { + matches = 1; + last_boottime = new_boottime; + } + + if(matches >= required_matches) + return new_boottime; + + microsleep(1000); // 1ms + } + + return 0; +} + +time_t os_boottime(void) { + // Fast path - return cached value if available + if(cached_boottime > 0) + return cached_boottime; + + spinlock_lock(&spinlock); + + // Check again under lock in case another thread set it + if(cached_boottime == 0) + cached_boottime = get_stable_boottime(); + + spinlock_unlock(&spinlock); + return cached_boottime; +} diff --git a/src/libnetdata/os/boottime.h b/src/libnetdata/os/boottime.h new file mode 100644 index 00000000000000..51c0b22b1d0813 --- /dev/null +++ b/src/libnetdata/os/boottime.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_BOOTTIME_H +#define NETDATA_BOOTTIME_H + +#include "libnetdata/common.h" + +/** + * Get system boot time + * + * Returns the absolute wallclock timestamp (Unix epoch) of when the system was last booted. + * The value is cached after first successful call. + * Returns 0 on error. + * + * @return time_t The boot timestamp, 0 on error + */ +time_t os_boottime(void); + +#endif //NETDATA_BOOTTIME_H diff --git a/src/libnetdata/os/disk_space.c b/src/libnetdata/os/disk_space.c new file mode 100644 index 00000000000000..765d7e41d78f9b --- /dev/null +++ b/src/libnetdata/os/disk_space.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" + +#if defined(OS_LINUX) +#include + +OS_SYSTEM_DISK_SPACE os_disk_space(const char *path) { + OS_SYSTEM_DISK_SPACE space = OS_SYSTEM_DISK_SPACE_EMPTY; + struct statvfs buf; + + if (statvfs(path, &buf) != 0) { + // Error occurred; errno is set + return space; + } + + // Use f_frsize (fragment size) for accurate byte calculations. + space.total_bytes = buf.f_blocks * buf.f_frsize; + space.free_bytes = buf.f_bavail * buf.f_frsize; + space.total_inodes = buf.f_files; + space.free_inodes = buf.f_favail; + space.is_read_only = (buf.f_flag & ST_RDONLY) != 0; + return space; +} +#endif + +#if defined(OS_FREEBSD) || defined(OS_MACOS) +#include +#include + +OS_SYSTEM_DISK_SPACE os_disk_space(const char *path) { + OS_SYSTEM_DISK_SPACE space = OS_SYSTEM_DISK_SPACE_EMPTY; + struct statfs buf; + + if (statfs(path, &buf) != 0) { + // Error occurred; errno is set + return space; + } + + space.total_bytes = buf.f_blocks * buf.f_bsize; + space.free_bytes = buf.f_bavail * buf.f_bsize; + space.total_inodes = buf.f_files; + space.free_inodes = buf.f_ffree; + space.is_read_only = (buf.f_flags & MNT_RDONLY) != 0; + return space; +} +#endif + +#if defined(OS_WINDOWS) +#include + +OS_SYSTEM_DISK_SPACE os_disk_space(const char *path_utf8) { + OS_SYSTEM_DISK_SPACE space = OS_SYSTEM_DISK_SPACE_EMPTY; + + // Convert the UTF-8 path to a wide-character string. + int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, path_utf8, -1, NULL, 0); + if (wlen == 0) { + // Conversion error; optionally, GetLastError() can provide more details. + return space; + } + + wchar_t *wpath = (wchar_t *)mallocz(wlen * sizeof(wchar_t)); + + if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, path_utf8, -1, wpath, wlen) == 0) { + // Conversion error. + freez(wpath); + return space; + } + + // Use the wide-character version of GetDiskFreeSpaceEx. + ULARGE_INTEGER freeBytesAvailable, totalNumberOfBytes, totalNumberOfFreeBytes; + if (!GetDiskFreeSpaceExW(wpath, &freeBytesAvailable, &totalNumberOfBytes, &totalNumberOfFreeBytes)) { + // API call failed; optionally, GetLastError() can provide more details. + freez(wpath); + return space; + } + + // Get the drive type and attributes + DWORD attributes = GetFileAttributesW(wpath); + if (attributes != INVALID_FILE_ATTRIBUTES) { + space.is_read_only = (attributes & FILE_ATTRIBUTE_READONLY) != 0; + } + + freez(wpath); + + space.total_bytes = totalNumberOfBytes.QuadPart; + space.free_bytes = totalNumberOfFreeBytes.QuadPart; + space.total_inodes = 0; // Windows does not have inodes + space.free_inodes = 0; // Windows does not have inodes + return space; +} +#endif diff --git a/src/libnetdata/os/disk_space.h b/src/libnetdata/os/disk_space.h new file mode 100644 index 00000000000000..1c4966c1777979 --- /dev/null +++ b/src/libnetdata/os/disk_space.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DISK_SPACE_H +#define NETDATA_DISK_SPACE_H + +#include "libnetdata/libnetdata.h" + +typedef struct { + uint64_t total_bytes; // Total disk size in bytes + uint64_t free_bytes; // Available disk space in bytes + uint64_t total_inodes; // Total number of inodes + uint64_t free_inodes; // Available inodes + bool is_read_only; // True if filesystem is read-only +} OS_SYSTEM_DISK_SPACE; + +#define OS_SYSTEM_DISK_SPACE_OK(space) ((space).total_bytes > 0) +#define OS_SYSTEM_DISK_SPACE_EMPTY (OS_SYSTEM_DISK_SPACE){ 0 } + +OS_SYSTEM_DISK_SPACE os_disk_space(const char *path); + +#endif //NETDATA_DISK_SPACE_H diff --git a/src/libnetdata/os/file_lock.c b/src/libnetdata/os/file_lock.c new file mode 100644 index 00000000000000..9cd19b6be5f2a7 --- /dev/null +++ b/src/libnetdata/os/file_lock.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "file_lock.h" + +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_MACOS) +#include +#include +#include +#endif + +#if defined(OS_WINDOWS) +#include +#endif + +FILE_LOCK file_lock_get(const char *filename) { + if(!filename || !*filename) + return FILE_LOCK_INVALID; + +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_MACOS) + // Try to create a new file, or open existing one + int fd = open(filename, O_RDWR | O_CREAT, 0666); + if(fd == -1) + return FILE_LOCK_INVALID; + + // LOCK_NB makes flock() non-blocking + if(flock(fd, LOCK_EX | LOCK_NB) == -1) { + close(fd); + return FILE_LOCK_INVALID; + } + + return (FILE_LOCK){ .fd = fd }; + +#elif defined(OS_WINDOWS) + // Convert MSYS2/Cygwin path directly to Windows wide-char path + ssize_t wpath_size = cygwin_conv_path(CCP_POSIX_TO_WIN_W, filename, NULL, 0); + if(wpath_size < 0) + return FILE_LOCK_INVALID; + + wchar_t *wpath = mallocz(wpath_size); + if(!wpath) + return FILE_LOCK_INVALID; + + if(cygwin_conv_path(CCP_POSIX_TO_WIN_W, filename, wpath, wpath_size) != 0) { + freez(wpath); + return FILE_LOCK_INVALID; + } + + // Open existing file or create new one + HANDLE hFile = CreateFileW( + wpath, + GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, + OPEN_ALWAYS, // Open if exists, create if doesn't + FILE_ATTRIBUTE_NORMAL, + NULL + ); + + freez(wpath); + + if(hFile == INVALID_HANDLE_VALUE) + return FILE_LOCK_INVALID; + + // Check if file is empty + LARGE_INTEGER size; + if(!GetFileSizeEx(hFile, &size)) { + CloseHandle(hFile); + return FILE_LOCK_INVALID; + } + + // Write a byte only if file is empty + if(size.QuadPart == 0) { + DWORD written; + if(!WriteFile(hFile, "!", 1, &written, NULL) || written != 1) { + CloseHandle(hFile); + return FILE_LOCK_INVALID; + } + } + + // Try to lock the entire file + OVERLAPPED overlapped = {0}; + if(!LockFileEx( + hFile, + LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, + 0, + MAXDWORD, + MAXDWORD, + &overlapped)) { + CloseHandle(hFile); + return FILE_LOCK_INVALID; + } + + return (FILE_LOCK){ .handle = hFile }; + +#else +#error "Unsupported operating system" +#endif +} + +void file_lock_release(FILE_LOCK lock) { +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_MACOS) + if(FILE_LOCK_OK(lock)) { + // flock is automatically released when file is closed + close(lock.fd); + } +#elif defined(OS_WINDOWS) + if(FILE_LOCK_OK(lock)) { + // File lock is automatically released when handle is closed + CloseHandle(lock.handle); + } +#endif +} diff --git a/src/libnetdata/os/file_lock.h b/src/libnetdata/os/file_lock.h new file mode 100644 index 00000000000000..f53903a2f97f98 --- /dev/null +++ b/src/libnetdata/os/file_lock.h @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_FILE_LOCK_H +#define NETDATA_FILE_LOCK_H + +#include "libnetdata/libnetdata.h" + +typedef struct file_lock { +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_MACOS) + int fd; +#elif defined(OS_WINDOWS) + HANDLE handle; +#else +#error "Unsupported operating system" +#endif +} FILE_LOCK; + +// Initialize to invalid values +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_MACOS) +#define FILE_LOCK_INVALID ((FILE_LOCK){ .fd = -1 }) +#define FILE_LOCK_OK(lock) ((lock).fd != -1) +#elif defined(OS_WINDOWS) +#define FILE_LOCK_INVALID ((FILE_LOCK){ .handle = INVALID_HANDLE_VALUE }) +#define FILE_LOCK_OK(lock) ((lock).handle != INVALID_HANDLE_VALUE) +#endif + +/** + * Get a file lock + * + * Attempts to acquire an exclusive lock on a file. The lock is automatically released + * when the process exits or if the process crashes. Only one process can hold the lock + * at a time. + * + * @param filename UTF-8 encoded filename (MSYS2 path format on Windows) + * @return FILE_LOCK The lock handle. Use FILE_LOCK_OK() to check if lock was acquired + */ +FILE_LOCK file_lock_get(const char *filename); + +/** + * Release a file lock + * + * Releases a previously acquired file lock. After calling this function, + * another process may acquire the lock. + * + * @param lock The lock to release + */ +void file_lock_release(FILE_LOCK lock); + +#endif //NETDATA_FILE_LOCK_H diff --git a/src/libnetdata/os/file_metadata.c b/src/libnetdata/os/file_metadata.c new file mode 100644 index 00000000000000..9ba223fa1527b0 --- /dev/null +++ b/src/libnetdata/os/file_metadata.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" +#include + +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_MACOS) +#include + +OS_FILE_METADATA os_get_file_metadata(const char *path) { + OS_FILE_METADATA metadata = {0}; + struct stat st; + + if (stat(path, &st) != 0) + return metadata; + + metadata.size_bytes = st.st_size; + metadata.modified_time = st.st_mtime; + return metadata; +} +#endif + +#if defined(OS_WINDOWS) +#include + +OS_FILE_METADATA os_get_file_metadata(const char *path) { + OS_FILE_METADATA metadata = {0}; + + // Convert UTF-8 path to wide-character string + int wlen = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, path, -1, NULL, 0); + if (wlen == 0) + return metadata; + + wchar_t *wpath = (wchar_t *)mallocz(wlen * sizeof(wchar_t)); + if (!wpath) + return metadata; + + if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, path, -1, wpath, wlen) == 0) { + freez(wpath); + return metadata; + } + + WIN32_FILE_ATTRIBUTE_DATA attr_data; + if (!GetFileAttributesExW(wpath, GetFileExInfoStandard, &attr_data)) { + freez(wpath); + return metadata; + } + + freez(wpath); + + // Combine high and low parts for 64-bit file size + ULARGE_INTEGER file_size; + file_size.HighPart = attr_data.nFileSizeHigh; + file_size.LowPart = attr_data.nFileSizeLow; + metadata.size_bytes = file_size.QuadPart; + + // Convert Windows FILETIME to Unix timestamp + // Windows FILETIME is in 100-nanosecond intervals since January 1, 1601 UTC + // Need to convert to seconds since January 1, 1970 UTC + ULARGE_INTEGER win_time; + win_time.HighPart = attr_data.ftLastWriteTime.dwHighDateTime; + win_time.LowPart = attr_data.ftLastWriteTime.dwLowDateTime; + + // Subtract Windows epoch start (January 1, 1601 UTC) + // Add Unix epoch start (January 1, 1970 UTC) + // Convert from 100-nanosecond intervals to seconds + const uint64_t WINDOWS_TICK = 10000000; + const uint64_t SEC_TO_UNIX_EPOCH = 11644473600LL; + metadata.modified_time = (time_t)((win_time.QuadPart / WINDOWS_TICK) - SEC_TO_UNIX_EPOCH); + + return metadata; +} +#endif \ No newline at end of file diff --git a/src/libnetdata/os/file_metadata.h b/src/libnetdata/os/file_metadata.h new file mode 100644 index 00000000000000..64a76c0db62cec --- /dev/null +++ b/src/libnetdata/os/file_metadata.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_FILE_METADATA_H +#define NETDATA_FILE_METADATA_H + +#include "libnetdata/libnetdata.h" +#include +#include + +typedef struct { + uint64_t size_bytes; // File size in bytes + time_t modified_time; // Last modification time (Unix timestamp) +} OS_FILE_METADATA; + +OS_FILE_METADATA os_get_file_metadata(const char *path); + +#define OS_FILE_METADATA_OK(metadata) ((metadata).modified_time > 0 && (metadata).size_bytes > 0) + +#endif //NETDATA_FILE_METADATA_H \ No newline at end of file diff --git a/src/libnetdata/os/os.h b/src/libnetdata/os/os.h index ee01b18a211a04..a27d32a76a90d1 100644 --- a/src/libnetdata/os/os.h +++ b/src/libnetdata/os/os.h @@ -32,6 +32,13 @@ #include "system-maps/cache-host-users-and-groups.h" #include "system-maps/cached-sid-username.h" #include "windows-perflib/perflib.h" +#include "disk_space.h" +#include "file_metadata.h" +#include "process_path.h" +#include "boottime.h" +#include "boot_id.h" +#include "run_dir.h" +#include "file_lock.h" // this includes windows.h to the whole of netdata // so various conflicts arise diff --git a/src/libnetdata/os/process_path.c b/src/libnetdata/os/process_path.c new file mode 100644 index 00000000000000..4dbad9805a8b64 --- /dev/null +++ b/src/libnetdata/os/process_path.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata/libnetdata.h" + +#if defined(OS_LINUX) +#include + +char *os_get_process_path(void) { + char path[PATH_MAX + 1] = ""; + ssize_t len = readlink("/proc/self/exe", path, PATH_MAX); + + if (len < 0) { + // Error occurred; errno is set + return NULL; + } + + path[len] = '\0'; // readlink doesn't null terminate + return strdupz(path); +} +#endif + +#if defined(OS_FREEBSD) +#include + +char *os_get_process_path(void) { + char path[PATH_MAX + 1] = ""; + int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 }; + size_t len = sizeof(path); + + if (sysctl(mib, 4, path, &len, NULL, 0) == -1) { + // Error occurred; errno is set + return NULL; + } + + return strdupz(path); +} +#endif + +#if defined(OS_MACOS) +#include + +char *os_get_process_path(void) { + char path[PATH_MAX + 1] = ""; + uint32_t size = sizeof(path); + + if (_NSGetExecutablePath(path, &size) != 0) { + // Buffer too small + return NULL; + } + + // Resolve any symlinks to get the real path + char real_path[PATH_MAX + 1] = ""; + if (!realpath(path, real_path)) { + // Error occurred; errno is set + return NULL; + } + + return strdupz(real_path); +} +#endif + +#if defined(OS_WINDOWS) +#include + +char *os_get_process_path(void) { + wchar_t wpath[32768] = L""; // Maximum path length in Windows + DWORD length = GetModuleFileNameW(NULL, wpath, sizeof(wpath)/sizeof(wpath[0])); + + if (length == 0) { + // GetModuleFileName failed + return NULL; + } + + // Convert wide string to UTF-8 + int utf8_len = WideCharToMultiByte(CP_UTF8, 0, wpath, -1, NULL, 0, NULL, NULL); + if (utf8_len == 0) { + // Conversion error + return NULL; + } + + char *path = mallocz(utf8_len); + if (WideCharToMultiByte(CP_UTF8, 0, wpath, -1, path, utf8_len, NULL, NULL) == 0) { + // Conversion error + freez(path); + return NULL; + } + + return path; +} +#endif diff --git a/src/libnetdata/os/process_path.h b/src/libnetdata/os/process_path.h new file mode 100644 index 00000000000000..c2416fabc569e1 --- /dev/null +++ b/src/libnetdata/os/process_path.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PROCESS_PATH_H +#define NETDATA_PROCESS_PATH_H + +#include "libnetdata/libnetdata.h" + +// Get the full path of the current process executable +// Returns a malloced string that must be freed by the caller +// Returns NULL on error +char *os_get_process_path(void); + +#endif //NETDATA_PROCESS_PATH_H \ No newline at end of file diff --git a/src/libnetdata/os/run_dir.c b/src/libnetdata/os/run_dir.c new file mode 100644 index 00000000000000..2a09cf98ac2ef6 --- /dev/null +++ b/src/libnetdata/os/run_dir.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "run_dir.h" +#include "libnetdata/libnetdata.h" + +static char *cached_run_dir = NULL; +static SPINLOCK spinlock = SPINLOCK_INITIALIZER; + +static inline bool is_dir_accessible(const char *dir, bool rw) { + struct stat st; + if (stat(dir, &st) == -1) + return false; + + if (!S_ISDIR(st.st_mode)) + return false; + + // Check if we can write to the directory + if (access(dir, rw ? W_OK : R_OK) == -1) + return false; + + return true; +} + +static inline bool netdata_dir_in_parent(const char *parent, char *out_path, size_t out_path_len, bool rw) { + if (!is_dir_accessible(parent, rw)) + return false; + + snprintfz(out_path, out_path_len, "%s/netdata", parent); + if (mkdir(out_path, 0755) == -1 && errno != EEXIST) + return false; + + return is_dir_accessible(out_path, rw); +} + +static char *detect_run_dir(bool rw) { + char path[FILENAME_MAX + 1]; + + if(!rw) { + // First check for environment variable + const char *env_dir = getenv("NETDATA_RUN_DIR"); + if (env_dir && *env_dir) { + if (is_dir_accessible(env_dir, rw)) + return strdupz(env_dir); + } + } + +#if defined(OS_LINUX) + // First try /run/netdata + if (netdata_dir_in_parent("/run", path, sizeof(path), rw)) + goto success; +#endif + +#if defined(OS_MACOS) + // macOS typically uses /private/var/run + if (netdata_dir_in_parent("/private/var/run", path, sizeof(path), rw)) + goto success; +#endif + +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_MACOS) + // Then try /var/run/netdata + if (netdata_dir_in_parent("/var/run", path, sizeof(path), rw)) + goto success; +#endif + +//#if defined(OS_WINDOWS) +// // On MSYS2/Cygwin get TEMP and convert it properly +// WCHAR temp_pathW[MAX_PATH]; +// DWORD len = GetEnvironmentVariableW(L"TEMP", temp_pathW, MAX_PATH); +// if (len > 0 && len < MAX_PATH) { +// // Convert Windows wide path to UTF-8 +// int utf8_len = WideCharToMultiByte(CP_UTF8, 0, temp_pathW, -1, NULL, 0, NULL, NULL); +// if (utf8_len > 0 && utf8_len < FILENAME_MAX) { +// char win_path[FILENAME_MAX + 1]; +// if (WideCharToMultiByte(CP_UTF8, 0, temp_pathW, -1, win_path, sizeof(win_path), NULL, NULL)) { +// // Convert Windows path to Unix path using Cygwin API +// ssize_t unix_size = cygwin_conv_path(CCP_WIN_A_TO_POSIX, win_path, NULL, 0); +// if (unix_size > 0) { +// char unix_path[FILENAME_MAX + 1]; +// if (cygwin_conv_path(CCP_WIN_A_TO_POSIX, win_path, unix_path, sizeof(unix_path)) == 0) { +// if (is_dir_accessible(unix_path, rw)) { +// snprintfz(path, sizeof(path), "%s/netdata", unix_path); +// if (!rw) +// goto success; +// +// if (mkdir(path, 0755) == 0 || errno == EEXIST) +// goto success; +// } +// } +// } +// } +// } +// } +//#endif + + // Fallback to /tmp/netdata - force creation if needed + if (!is_dir_accessible("/tmp", rw)) { + // Try to create /tmp with standard permissions (including sticky bit) + if (rw && mkdir("/tmp", 01777) == -1 && errno != EEXIST) + return NULL; + } + + snprintfz(path, sizeof(path), "/tmp/netdata"); + if (rw && mkdir(path, 0755) == -1 && errno != EEXIST) + return NULL; + +success: + // Set the environment variable for child processes + if(rw) + setenv("NETDATA_RUN_DIR", path, 1); + + return strdupz(path); +} + +const char *os_run_dir(bool rw) { + // Fast path - return cached directory if available + if(cached_run_dir) + return cached_run_dir; + + spinlock_lock(&spinlock); + + // Check again under lock in case another thread set it + if(!cached_run_dir) + cached_run_dir = detect_run_dir(rw); + + spinlock_unlock(&spinlock); + + return cached_run_dir; +} diff --git a/src/libnetdata/os/run_dir.h b/src/libnetdata/os/run_dir.h new file mode 100644 index 00000000000000..e5e16a80ff73bb --- /dev/null +++ b/src/libnetdata/os/run_dir.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RUN_DIR_H +#define NETDATA_RUN_DIR_H + +#include "libnetdata/libnetdata.h" + +/** + * Initialize and get the runtime directory for Netdata + * This function gets or creates the runtime directory based on environment or system defaults + * + * @param rw When true, create the directory if it doesn't exist + * @return const char* The runtime directory path + */ +const char *os_run_dir(bool rw); + +#endif //NETDATA_RUN_DIR_H diff --git a/src/libnetdata/os/system_memory.c b/src/libnetdata/os/system_memory.c index b2bb6066485462..7c36d20a1fe81b 100644 --- a/src/libnetdata/os/system_memory.c +++ b/src/libnetdata/os/system_memory.c @@ -15,7 +15,7 @@ OS_SYSTEM_MEMORY os_last_reported_system_memory(void) { #include OS_SYSTEM_MEMORY os_system_memory(bool query_total_ram __maybe_unused) { - OS_SYSTEM_MEMORY sm = {0, 0}; + OS_SYSTEM_MEMORY sm = OS_SYSTEM_MEMORY_EMPTY; MEMORYSTATUSEX statex; statex.dwLength = sizeof(statex); @@ -29,55 +29,11 @@ OS_SYSTEM_MEMORY os_system_memory(bool query_total_ram __maybe_unused) { } #endif -// macOS -#if defined(OS_MACOS) -#include -#include - -OS_SYSTEM_MEMORY os_system_memory(bool query_total_ram) { - static uint64_t total_ram = 0; - static uint64_t page_size = 0; - - if (page_size == 0) { - size_t len = sizeof(page_size); - if (sysctlbyname("hw.pagesize", &page_size, &len, NULL, 0) != 0) - return (OS_SYSTEM_MEMORY){ 0, 0 }; - } - - if (query_total_ram || total_ram == 0) { - size_t len = sizeof(total_ram); - if (sysctlbyname("hw.memsize", &total_ram, &len, NULL, 0) != 0) - return (OS_SYSTEM_MEMORY){ 0, 0 }; - } - - uint64_t ram_available = 0; - if (page_size > 0) { - vm_statistics64_data_t vm_info; - mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; - mach_port_t mach_port = mach_host_self(); - - if (host_statistics64(mach_port, HOST_VM_INFO64, (host_info_t)&vm_info, &count) != KERN_SUCCESS) { - mach_port_deallocate(mach_task_self(), mach_port); - return (OS_SYSTEM_MEMORY){0, 0}; - } - - ram_available = (vm_info.free_count + vm_info.inactive_count + vm_info.purgeable_count) * page_size; - mach_port_deallocate(mach_task_self(), mach_port); - } - - os_system_memory_last = (OS_SYSTEM_MEMORY){ - .ram_total_bytes = total_ram, - .ram_available_bytes = ram_available, - }; - return os_system_memory_last; -} -#endif - // Linux #if defined(OS_LINUX) static OS_SYSTEM_MEMORY os_system_memory_cgroup_v1(bool query_total_ram __maybe_unused) { - static OS_SYSTEM_MEMORY sm = {0, 0}; + static OS_SYSTEM_MEMORY sm = OS_SYSTEM_MEMORY_EMPTY; char buf[4096]; uint64_t used = 0, inactive = 0; @@ -118,13 +74,12 @@ static OS_SYSTEM_MEMORY os_system_memory_cgroup_v1(bool query_total_ram __maybe_ return sm; failed: - sm.ram_total_bytes = 0; - sm.ram_available_bytes = 0; + sm = OS_SYSTEM_MEMORY_EMPTY; return sm; } static OS_SYSTEM_MEMORY os_system_memory_cgroup_v2(bool query_total_ram __maybe_unused) { - static OS_SYSTEM_MEMORY sm = {0, 0}; + static OS_SYSTEM_MEMORY sm = OS_SYSTEM_MEMORY_EMPTY; char buf[4096]; uint64_t used = 0, inactive = 0; @@ -169,8 +124,7 @@ static OS_SYSTEM_MEMORY os_system_memory_cgroup_v2(bool query_total_ram __maybe_ return sm; failed: - sm.ram_total_bytes = 0; - sm.ram_available_bytes = 0; + sm = OS_SYSTEM_MEMORY_EMPTY; return sm; } @@ -178,7 +132,7 @@ static OS_SYSTEM_MEMORY os_system_memory_cgroup_v2(bool query_total_ram __maybe_ #define MEMINFO_MEMAVAILABLE "MemAvailable:" static OS_SYSTEM_MEMORY os_system_memory_meminfo(bool query_total_ram __maybe_unused) { - static OS_SYSTEM_MEMORY sm = {0, 0}; + static OS_SYSTEM_MEMORY sm = OS_SYSTEM_MEMORY_EMPTY; char buf[4096]; if (read_txt_file("/proc/meminfo", buf, sizeof(buf)) != 0) @@ -212,7 +166,7 @@ typedef enum { } OS_MEM_SRC; OS_SYSTEM_MEMORY os_system_memory(bool query_total_ram __maybe_unused) { - static OS_SYSTEM_MEMORY sm = {0, 0}; + static OS_SYSTEM_MEMORY sm = OS_SYSTEM_MEMORY_EMPTY; static usec_t last_ut = 0, last_total_ut = 0; static OS_MEM_SRC src = OS_MEM_SRC_UNKNOWN; @@ -284,7 +238,7 @@ OS_SYSTEM_MEMORY os_system_memory(bool query_total_ram __maybe_unused) { #include OS_SYSTEM_MEMORY os_system_memory(bool query_total_ram) { - static OS_SYSTEM_MEMORY sm = {0, 0}; + static OS_SYSTEM_MEMORY sm = OS_SYSTEM_MEMORY_EMPTY; // Query the total RAM only if needed or if it hasn't been cached if (query_total_ram || sm.ram_total_bytes == 0) { @@ -319,8 +273,51 @@ OS_SYSTEM_MEMORY os_system_memory(bool query_total_ram) { return sm; failed: - sm.ram_total_bytes = 0; - sm.ram_available_bytes = 0; + sm = OS_SYSTEM_MEMORY_EMPTY; return sm; } #endif + +// macOS +#if defined(OS_MACOS) +#include +#include + +OS_SYSTEM_MEMORY os_system_memory(bool query_total_ram) { + static uint64_t total_ram = 0; + static uint64_t page_size = 0; + + if (page_size == 0) { + size_t len = sizeof(page_size); + if (sysctlbyname("hw.pagesize", &page_size, &len, NULL, 0) != 0) + return OS_SYSTEM_MEMORY_EMPTY; + } + + if (query_total_ram || total_ram == 0) { + size_t len = sizeof(total_ram); + if (sysctlbyname("hw.memsize", &total_ram, &len, NULL, 0) != 0) + return OS_SYSTEM_MEMORY_EMPTY; + } + + uint64_t ram_available = 0; + if (page_size > 0) { + vm_statistics64_data_t vm_info; + mach_msg_type_number_t count = HOST_VM_INFO64_COUNT; + mach_port_t mach_port = mach_host_self(); + + if (host_statistics64(mach_port, HOST_VM_INFO64, (host_info_t)&vm_info, &count) != KERN_SUCCESS) { + mach_port_deallocate(mach_task_self(), mach_port); + return OS_SYSTEM_MEMORY_EMPTY; + } + + ram_available = (vm_info.free_count + vm_info.inactive_count + vm_info.purgeable_count) * page_size; + mach_port_deallocate(mach_task_self(), mach_port); + } + + os_system_memory_last = (OS_SYSTEM_MEMORY){ + .ram_total_bytes = total_ram, + .ram_available_bytes = ram_available, + }; + return os_system_memory_last; +} +#endif diff --git a/src/libnetdata/os/system_memory.h b/src/libnetdata/os/system_memory.h index 2695413ace7008..04cb4ea67e850d 100644 --- a/src/libnetdata/os/system_memory.h +++ b/src/libnetdata/os/system_memory.h @@ -20,6 +20,9 @@ typedef struct { uint64_t ram_available_bytes; } OS_SYSTEM_MEMORY; +#define OS_SYSTEM_MEMORY_OK(mem) ((mem).ram_total_bytes > 0) +#define OS_SYSTEM_MEMORY_EMPTY (OS_SYSTEM_MEMORY){ 0 } + // The function to get current system memory: OS_SYSTEM_MEMORY os_system_memory(bool query_total_ram); diff --git a/src/libnetdata/required_dummies.h b/src/libnetdata/required_dummies.h index cff4c563a5a375..14d7969d4e97a6 100644 --- a/src/libnetdata/required_dummies.h +++ b/src/libnetdata/required_dummies.h @@ -4,13 +4,13 @@ #define NETDATA_LIB_DUMMIES_H 1 // callback required by fatal() -void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) +void netdata_cleanup_and_exit(EXIT_REASON reason, const char *action, const char *action_result, const char *action_data) { (void)action; (void)action_result; (void)action_data; - exit(ret); + exit(reason == EXIT_REASON_FATAL ? 1 : 0); } void rrdset_thread_rda_free(void){} diff --git a/src/libnetdata/uuid/uuid.h b/src/libnetdata/uuid/uuid.h index cabf2460403407..5e9ffc4fa25c7c 100644 --- a/src/libnetdata/uuid/uuid.h +++ b/src/libnetdata/uuid/uuid.h @@ -37,6 +37,7 @@ ND_UUID_DEFINE(log_flood_protection_msgid, 0xec, 0x87, 0xa5, 0x61, 0x20, 0xd5, 0 ND_UUID_DEFINE(netdata_startup_msgid, 0x1e, 0x60, 0x61, 0xa9, 0xfb, 0xd4, 0x45, 0x01, 0xb3, 0xcc, 0xc3, 0x68, 0x11, 0x9f, 0x2b, 0x69); ND_UUID_DEFINE(aclk_connection_msgid, 0xac, 0xb3, 0x3c, 0xb9, 0x57, 0x78, 0x47, 0x6b, 0xaa, 0xc7, 0x02, 0xeb, 0x7e, 0x4e, 0x15, 0x1d); ND_UUID_DEFINE(extreme_cardinality_msgid, 0xd1, 0xf5, 0x96, 0x06, 0xdd, 0x4d, 0x41, 0xe3, 0xb2, 0x17, 0xa0, 0xcf, 0xca, 0xe8, 0xe6, 0x32); +ND_UUID_DEFINE(netdata_exit_msgid, 0x02, 0xf4, 0x7d, 0x35, 0x0a, 0xf5, 0x44, 0x91, 0x97, 0xbf, 0x7a, 0x95, 0xb6, 0x05, 0xa4, 0x68); ND_UUID_DEFINE(dyncfg_user_action_msgid, 0x4f, 0xdf, 0x40, 0x81, 0x6c, 0x12, 0x46, 0x23, 0xa0, 0x32, 0xb7, 0xfe, 0x73, 0xbe, 0xac, 0xb8); ND_UUID UUID_generate_from_hash(const void *payload, size_t payload_len); diff --git a/src/libnetdata/worker_utilization/worker_utilization.c b/src/libnetdata/worker_utilization/worker_utilization.c index 3f5f4fccc76b4a..f5c1006616cec8 100644 --- a/src/libnetdata/worker_utilization/worker_utilization.c +++ b/src/libnetdata/worker_utilization/worker_utilization.c @@ -52,6 +52,8 @@ struct worker { size_t spinlocks_used; struct worker_spinlock spinlocks[WORKER_SPINLOCK_CONTENTION_FUNCTIONS]; + uint64_t memory_calls[WORKERS_MEMORY_CALL_MAX]; + struct worker *next; struct worker *prev; }; @@ -61,6 +63,24 @@ struct workers_workname { // this is what we add to Ju struct worker *base; }; +ENUM_STR_MAP_DEFINE(WORKERS_MEMORY_CALL) = { + {WORKERS_MEMORY_CALL_LIBC_MALLOC, "malloc"}, + {WORKERS_MEMORY_CALL_LIBC_CALLOC, "calloc"}, + {WORKERS_MEMORY_CALL_LIBC_REALLOC, "realloc"}, + {WORKERS_MEMORY_CALL_LIBC_FREE, "free"}, + {WORKERS_MEMORY_CALL_LIBC_STRDUP, "strdup"}, + {WORKERS_MEMORY_CALL_LIBC_STRNDUP, "strndup"}, + {WORKERS_MEMORY_CALL_LIBC_POSIX_MEMALIGN, "posix_memalign"}, + {WORKERS_MEMORY_CALL_LIBC_POSIX_MEMALIGN_FREE, "posix_memalign_free"}, + {WORKERS_MEMORY_CALL_MMAP, "mmap"}, + {WORKERS_MEMORY_CALL_MUNMAP, "munmap"}, + + // terminator + {0, NULL}, +}; + +ENUM_STR_DEFINE_FUNCTIONS(WORKERS_MEMORY_CALL, WORKERS_MEMORY_CALL_LIBC_MALLOC, "other"); + static struct workers_globals { bool enabled; @@ -100,7 +120,7 @@ size_t workers_allocated_memory(void) { } void worker_register(const char *name) { - if(unlikely(worker || !workers_globals.enabled)) + if(likely(worker || !workers_globals.enabled)) return; worker = callocz(1, sizeof(struct worker)); @@ -140,7 +160,7 @@ void worker_register(const char *name) { } void worker_register_job_custom_metric(size_t job_id, const char *name, const char *units, WORKER_METRIC_TYPE type) { - if(unlikely(!worker)) return; + if(likely(!worker)) return; if(unlikely(job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES)) { netdata_log_error("WORKER_UTILIZATION: job_id %zu is too big. Max is %zu", job_id, (size_t)(WORKER_UTILIZATION_MAX_JOB_TYPES - 1)); @@ -166,7 +186,7 @@ void worker_register_job_name(size_t job_id, const char *name) { } void worker_unregister(void) { - if(unlikely(!worker)) return; + if(likely(!worker)) return; size_t workname_size = strlen(worker->workname) + 1; spinlock_lock(&workers_globals.spinlock); @@ -214,7 +234,7 @@ static void worker_is_idle_with_time(usec_t now) { } ALWAYS_INLINE void worker_is_idle(void) { - if(unlikely(!worker || worker->last_action != WORKER_BUSY)) return; + if(likely(!worker || worker->last_action != WORKER_BUSY)) return; worker_is_idle_with_time(worker_now_monotonic_usec()); } @@ -236,7 +256,7 @@ static void worker_is_busy_do(size_t job_id) { } ALWAYS_INLINE void worker_is_busy(size_t job_id) { - if(unlikely(!worker || job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES)) + if(likely(!worker || job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES)) return; worker_is_busy_do(job_id); @@ -257,7 +277,7 @@ static void worker_set_metric_do(size_t job_id, NETDATA_DOUBLE value) { } ALWAYS_INLINE void worker_set_metric(size_t job_id, NETDATA_DOUBLE value) { - if(unlikely(!worker || job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES)) + if(likely(!worker || job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES)) return; worker_set_metric_do(job_id, value); @@ -289,12 +309,19 @@ static void worker_spinlock_contention_do(const char *func, size_t spins) { } ALWAYS_INLINE void worker_spinlock_contention(const char *func, size_t spins) { - if(unlikely(!worker)) + if(likely(!worker)) return; worker_spinlock_contention_do(func, spins); } +ALWAYS_INLINE void workers_memory_call(WORKERS_MEMORY_CALL call) { + if(likely(!worker || call >= WORKERS_MEMORY_CALL_MAX)) + return; + + worker->memory_calls[call]++; +} + // statistics interface void workers_foreach(const char *name, void (*callback)( @@ -314,6 +341,7 @@ void workers_foreach(const char *name, void (*callback)( , const char *spinlock_functions[] , size_t *spinlock_locks , size_t *spinlock_spins + , uint64_t *memory_calls ) , void *data) { if(!workers_globals.enabled) @@ -354,6 +382,8 @@ void workers_foreach(const char *name, void (*callback)( size_t spinlock_locks[WORKER_SPINLOCK_CONTENTION_FUNCTIONS]; size_t spinlock_spins[WORKER_SPINLOCK_CONTENTION_FUNCTIONS]; + uint64_t memory_calls[WORKERS_MEMORY_CALL_MAX]; + size_t max_job_id = p->worker_max_job_id; for(size_t i = 0; i <= max_job_id ;i++) { per_job_type_name[i] = p->per_job_type[i].name; @@ -466,6 +496,10 @@ void workers_foreach(const char *name, void (*callback)( // ------------------------------------------------------------------------------------------------------------ + memcpy(memory_calls, p->memory_calls, sizeof(memory_calls)); + + // ------------------------------------------------------------------------------------------------------------ + callback(data , p->pid , p->tag @@ -483,6 +517,7 @@ void workers_foreach(const char *name, void (*callback)( , spinlock_functions , spinlock_locks , spinlock_spins + , memory_calls ); } diff --git a/src/libnetdata/worker_utilization/worker_utilization.h b/src/libnetdata/worker_utilization/worker_utilization.h index dd7f237e0cb129..03159aee8a5d80 100644 --- a/src/libnetdata/worker_utilization/worker_utilization.h +++ b/src/libnetdata/worker_utilization/worker_utilization.h @@ -16,6 +16,26 @@ typedef enum __attribute__((packed)) { WORKER_METRIC_INCREMENTAL_TOTAL = 4, } WORKER_METRIC_TYPE; +typedef enum { + WORKERS_MEMORY_CALL_LIBC_MALLOC = 0, + WORKERS_MEMORY_CALL_LIBC_CALLOC, + WORKERS_MEMORY_CALL_LIBC_REALLOC, + WORKERS_MEMORY_CALL_LIBC_FREE, + WORKERS_MEMORY_CALL_LIBC_STRDUP, + WORKERS_MEMORY_CALL_LIBC_STRNDUP, + WORKERS_MEMORY_CALL_LIBC_POSIX_MEMALIGN, + WORKERS_MEMORY_CALL_LIBC_POSIX_MEMALIGN_FREE, + WORKERS_MEMORY_CALL_MMAP, + WORKERS_MEMORY_CALL_MUNMAP, + + // terminator + WORKERS_MEMORY_CALL_MAX, +} WORKERS_MEMORY_CALL; + +ENUM_STR_DEFINE_FUNCTIONS_EXTERN(WORKERS_MEMORY_CALL); + +void workers_memory_call(WORKERS_MEMORY_CALL call); + void workers_utilization_enable(void); size_t workers_allocated_memory(void); void worker_register(const char *name); @@ -48,6 +68,7 @@ void workers_foreach(const char *name, void (*callback)( , const char *spinlock_functions[] , size_t *spinlock_locks , size_t *spinlock_spins + , uint64_t *memory_calls ) , void *data); diff --git a/src/ml/ml_memory.cc b/src/ml/ml_memory.cc index df087f52c8e5e1..00b5b65375a9af 100644 --- a/src/ml/ml_memory.cc +++ b/src/ml/ml_memory.cc @@ -10,6 +10,7 @@ void *operator new(size_t size) throw std::bad_alloc(); pulse_ml_memory_allocated(size); + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_MALLOC); return ptr; } @@ -20,6 +21,7 @@ void *operator new[](size_t size) throw std::bad_alloc(); pulse_ml_memory_allocated(size); + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_MALLOC); return ptr; } @@ -27,6 +29,7 @@ void operator delete(void *ptr, size_t size) noexcept { if (ptr) { pulse_ml_memory_freed(size); + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_FREE); free(ptr); } } @@ -35,6 +38,7 @@ void operator delete[](void *ptr, size_t size) noexcept { if (ptr) { pulse_ml_memory_freed(size); + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_FREE); free(ptr); } } @@ -42,6 +46,7 @@ void operator delete[](void *ptr, size_t size) noexcept void operator delete(void *ptr) noexcept { if (ptr) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_FREE); free(ptr); } } @@ -49,6 +54,7 @@ void operator delete(void *ptr) noexcept void operator delete[](void *ptr) noexcept { if (ptr) { + workers_memory_call(WORKERS_MEMORY_CALL_LIBC_FREE); free(ptr); } } diff --git a/src/registry/registry.h b/src/registry/registry.h index 9c91eba8175c88..20996222783a8a 100644 --- a/src/registry/registry.h +++ b/src/registry/registry.h @@ -55,7 +55,8 @@ // initialize the registry // should only happen when netdata starts -int registry_init(void); +void registry_init(void); +bool registry_load(void); // free all data held by the registry // should only happen when netdata exits diff --git a/src/registry/registry_init.c b/src/registry/registry_init.c index 1df64dbf9c0a7c..9575aea99a7f9d 100644 --- a/src/registry/registry_init.c +++ b/src/registry/registry_init.c @@ -62,7 +62,11 @@ void registry_generate_curl_urls(void) { fclose(fp); } -int registry_init(void) { +void registry_init(void) { + FUNCTION_RUN_ONCE(); + + netdata_conf_section_global(); + char filename[FILENAME_MAX + 1]; // registry enabled? @@ -70,7 +74,7 @@ int registry_init(void) { registry.enabled = inicfg_get_boolean(&netdata_config, CONFIG_SECTION_REGISTRY, "enabled", 0); } else { - netdata_log_info("Registry is disabled - use the central netdata"); + netdata_log_info("Registry is disabled"); inicfg_set_boolean(&netdata_config, CONFIG_SECTION_REGISTRY, "enabled", 0); registry.enabled = 0; } @@ -117,8 +121,6 @@ int registry_init(void) { inicfg_set_number(&netdata_config, CONFIG_SECTION_REGISTRY, "max URL name length", (long long)registry.max_name_length); } - bool use_mmap = inicfg_get_boolean(&netdata_config, CONFIG_SECTION_REGISTRY, "use mmap", false); - // initialize entries counters registry.persons_count = 0; registry.machines_count = 0; @@ -128,9 +130,12 @@ int registry_init(void) { // initialize locks netdata_mutex_init(®istry.lock); +} - // load the registry database +bool registry_load(void) { if(registry.enabled) { + bool use_mmap = inicfg_get_boolean(&netdata_config, CONFIG_SECTION_REGISTRY, "use mmap", false); + // create dictionaries registry.persons = dictionary_create(REGISTRY_DICTIONARY_OPTIONS); registry.machines = dictionary_create(REGISTRY_DICTIONARY_OPTIONS); @@ -180,12 +185,14 @@ int registry_init(void) { if(unlikely(registry_db_should_be_saved())) registry_db_save(); -// registry_db_stats(); -// registry_generate_curl_urls(); -// exit(0); + // registry_db_stats(); + // registry_generate_curl_urls(); + // exit(0); + + return true; } - return 0; + return false; } static int machine_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *entry, void *data __maybe_unused) { diff --git a/src/streaming/protocol/command-begin-set-end-init.c b/src/streaming/protocol/command-begin-set-end-init.c index 1ed2832e7fe98f..fa4a6b1ba5f54d 100644 --- a/src/streaming/protocol/command-begin-set-end-init.c +++ b/src/streaming/protocol/command-begin-set-end-init.c @@ -8,7 +8,7 @@ static BUFFER *preferred_sender_buffer(RRDHOST *host) { if(host->stream.snd.commit.receiver_tid == gettid_cached()) return sender_host_buffer(host); else - return sender_thread_buffer(host->sender); + return sender_thread_buffer(host->sender, HOST_THREAD_BUFFER_INITIAL_SIZE); } ALWAYS_INLINE RRDSET_STREAM_BUFFER stream_send_metrics_init(RRDSET *st, time_t wall_clock_time) { diff --git a/src/streaming/stream-circular-buffer.h b/src/streaming/stream-circular-buffer.h index fb23c7f060aecb..33080d5818dbe1 100644 --- a/src/streaming/stream-circular-buffer.h +++ b/src/streaming/stream-circular-buffer.h @@ -12,7 +12,8 @@ extern "C" { #define CBUFFER_INITIAL_SIZE (16 * 1024) #define CBUFFER_INITIAL_MAX_SIZE (10 * 1024 * 1024) -#define THREAD_BUFFER_INITIAL_SIZE (8192) +#define HOST_THREAD_BUFFER_INITIAL_SIZE (256 * 1024) +#define REPLICATION_THREAD_BUFFER_INITIAL_SIZE (512 * 1024) #define STREAM_CIRCULAR_BUFFER_ADAPT_TO_TIMES_MAX_SIZE 3 diff --git a/src/streaming/stream-conf.c b/src/streaming/stream-conf.c index 2ce2ff6da61074..d10e6fe0bfddcb 100644 --- a/src/streaming/stream-conf.c +++ b/src/streaming/stream-conf.c @@ -115,9 +115,7 @@ bool stream_conf_receiver_needs_dbengine(void) { } void stream_conf_load() { - static bool run = false; - if(run) return; - run = true; + FUNCTION_RUN_ONCE(); stream_conf_load_internal(); check_local_streaming_capabilities(); diff --git a/src/streaming/stream-receiver.c b/src/streaming/stream-receiver.c index 13ce7f4faa60c0..36668912d3e7db 100644 --- a/src/streaming/stream-receiver.c +++ b/src/streaming/stream-receiver.c @@ -808,8 +808,9 @@ bool stream_receiver_receive_data(struct stream_thread *sth, struct receiver_sta }; ND_LOG_STACK_PUSH(lgs); + size_t count = 1; // how many reads to do per host, before moving to the next host EVLOOP_STATUS status = EVLOOP_STATUS_CONTINUE; - while(status == EVLOOP_STATUS_CONTINUE) { + while(status == EVLOOP_STATUS_CONTINUE && count-- > 0) { bool removed = false; ssize_t rc = stream_receive_and_process(sth, rpt, parser, now_ut, &removed); if(unlikely(removed)) @@ -963,9 +964,9 @@ void stream_receiver_check_all_nodes_from_poll(struct stream_thread *sth, usec_t nd_poll_event_t wanted = ND_POLL_READ | (stats.bytes_outstanding ? ND_POLL_WRITE : 0); if(unlikely(rpt->thread.wanted != wanted)) { - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM RCV[%zu] '%s' [from %s]: nd_poll() wanted events mismatch.", - sth->id, rrdhost_hostname(rpt->host), rpt->remote_ip); +// nd_log(NDLS_DAEMON, NDLP_DEBUG, +// "STREAM RCV[%zu] '%s' [from %s]: nd_poll() wanted events mismatch.", +// sth->id, rrdhost_hostname(rpt->host), rpt->remote_ip); rpt->thread.wanted = wanted; if(!nd_poll_upd(sth->run.ndpl, rpt->sock.fd, rpt->thread.wanted)) diff --git a/src/streaming/stream-replication-sender.c b/src/streaming/stream-replication-sender.c index 7f93d532ead299..54ac66f2d1aa3a 100644 --- a/src/streaming/stream-replication-sender.c +++ b/src/streaming/stream-replication-sender.c @@ -622,7 +622,7 @@ bool replication_response_execute_finalize_and_send(struct replication_query *q, // we might want to optimize this by filling a temporary buffer // and copying the result to the host's buffer in order to avoid // holding the host's buffer lock for too long - BUFFER *wb = sender_thread_buffer(host->sender); + BUFFER *wb = sender_thread_buffer(host->sender, REPLICATION_THREAD_BUFFER_INITIAL_SIZE); buffer_fast_strcat(wb, PLUGINSD_KEYWORD_REPLAY_BEGIN, sizeof(PLUGINSD_KEYWORD_REPLAY_BEGIN) - 1); @@ -1863,7 +1863,7 @@ void *replication_thread_main(void *ptr) { } int replication_threads_default(void) { - int threads = netdata_conf_is_parent() ? (int)MIN(netdata_conf_cpus(), 6) : 1; + int threads = netdata_conf_is_parent() ? (int)MAX(netdata_conf_cpus() / 3, 4) : 1; threads = FIT_IN_RANGE(threads, 1, MAX_REPLICATION_THREADS); return threads; } diff --git a/src/streaming/stream-sender-commit.c b/src/streaming/stream-sender-commit.c index 99b2964b7fb7fa..f3a8c2d64a3f0f 100644 --- a/src/streaming/stream-sender-commit.c +++ b/src/streaming/stream-sender-commit.c @@ -24,7 +24,7 @@ void sender_host_buffer_free(RRDHOST *host) { } // Collector thread starting a transmission -BUFFER *sender_commit_start_with_trace(struct sender_state *s, struct sender_buffer *commit, const char *func) { +static BUFFER *sender_commit_start_with_trace(struct sender_state *s, struct sender_buffer *commit, size_t default_size, const char *func) { if(unlikely(commit->used)) fatal("STREAM SND '%s' [to %s]: thread buffer is used multiple times concurrently (%u). " "It is already being used by '%s()', and now is called by '%s()'", @@ -39,14 +39,14 @@ BUFFER *sender_commit_start_with_trace(struct sender_state *s, struct sender_buf commit->receiver_tid, gettid_cached(), func ? func : "(null)"); if(unlikely(commit->wb && - commit->wb->size > THREAD_BUFFER_INITIAL_SIZE && + commit->wb->size > default_size && commit->our_recreates != commit->sender_recreates)) { buffer_free(commit->wb); commit->wb = NULL; } if(unlikely(!commit->wb)) { - commit->wb = buffer_create(THREAD_BUFFER_INITIAL_SIZE, &netdata_buffers_statistics.buffers_streaming); + commit->wb = buffer_create(default_size, &netdata_buffers_statistics.buffers_streaming); commit->our_recreates = commit->sender_recreates; } @@ -58,12 +58,12 @@ BUFFER *sender_commit_start_with_trace(struct sender_state *s, struct sender_buf return commit->wb; } -BUFFER *sender_thread_buffer_with_trace(struct sender_state *s, const char *func) { - return sender_commit_start_with_trace(s, &commit___thread, func); +BUFFER *sender_thread_buffer_with_trace(struct sender_state *s, size_t default_size, const char *func) { + return sender_commit_start_with_trace(s, &commit___thread, default_size, func); } BUFFER *sender_host_buffer_with_trace(struct rrdhost *host, const char *func) { - return sender_commit_start_with_trace(host->sender, &host->stream.snd.commit, func); + return sender_commit_start_with_trace(host->sender, &host->stream.snd.commit, HOST_THREAD_BUFFER_INITIAL_SIZE, func); } // Collector thread finishing a transmission diff --git a/src/streaming/stream-sender-commit.h b/src/streaming/stream-sender-commit.h index d26be121cd84c2..269a7e1e4dad86 100644 --- a/src/streaming/stream-sender-commit.h +++ b/src/streaming/stream-sender-commit.h @@ -29,8 +29,8 @@ void sender_host_buffer_free(struct rrdhost *host); // get the thread buffer // this is the preferred buffer for dedicated workers sending a lot of messages (like replication) // these threads need to maintain enough allocation for repeated use of the buffer -BUFFER *sender_thread_buffer_with_trace(struct sender_state *s, const char *func); -#define sender_thread_buffer(s) sender_thread_buffer_with_trace(s, __FUNCTION__) +BUFFER *sender_thread_buffer_with_trace(struct sender_state *s, size_t default_size, const char *func); +#define sender_thread_buffer(s, default_size) sender_thread_buffer_with_trace(s, default_size, __FUNCTION__) // get the global host buffer // this is the preferred buffer for stream threads (unified receiver / sender threads) diff --git a/src/streaming/stream-sender.c b/src/streaming/stream-sender.c index 24f80e26648bec..e54d44bb1643de 100644 --- a/src/streaming/stream-sender.c +++ b/src/streaming/stream-sender.c @@ -521,9 +521,9 @@ void stream_sender_check_all_nodes_from_poll(struct stream_thread *sth, usec_t n nd_poll_event_t wanted = ND_POLL_READ | (stats.bytes_outstanding ? ND_POLL_WRITE : 0); if(unlikely(s->thread.wanted != wanted)) { - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM SND[%zu] '%s' [to %s]: nd_poll() wanted events mismatch.", - sth->id, rrdhost_hostname(s->host), s->remote_ip); +// nd_log(NDLS_DAEMON, NDLP_DEBUG, +// "STREAM SND[%zu] '%s' [to %s]: nd_poll() wanted events mismatch.", +// sth->id, rrdhost_hostname(s->host), s->remote_ip); s->thread.wanted = wanted; if(!nd_poll_upd(sth->run.ndpl, s->sock.fd, s->thread.wanted)) diff --git a/src/web/api/functions/function-bearer_get_token.c b/src/web/api/functions/function-bearer_get_token.c index 8f14e68aed6860..d95e6be0eb5ea6 100644 --- a/src/web/api/functions/function-bearer_get_token.c +++ b/src/web/api/functions/function-bearer_get_token.c @@ -13,7 +13,8 @@ struct bearer_token_request { STRING *client_name; }; -static bool bearer_parse_json_payload(json_object *jobj, const char *path, void *data, BUFFER *error) { +static bool bearer_parse_json_payload(json_object *jobj, void *data, BUFFER *error) { + const char *path = ""; struct bearer_token_request *rq = data; JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "claim_id", rq->claim_id, error, true); JSONC_PARSE_TXT2UUID_OR_ERROR_AND_RETURN(jobj, path, "machine_guid", rq->machine_guid, error, true); diff --git a/src/web/server/web_client.c b/src/web/server/web_client.c index d8ff80cc9dd1f6..d111ceef7e84bb 100644 --- a/src/web/server/web_client.c +++ b/src/web/server/web_client.c @@ -1185,13 +1185,13 @@ static inline int web_client_process_url(RRDHOST *host, struct web_client *w, ch w->response.data->content_type = CT_TEXT_PLAIN; buffer_flush(w->response.data); - if(!netdata_exit) + if(!exit_initiated) buffer_strcat(w->response.data, "ok, will do..."); else buffer_strcat(w->response.data, "I am doing it already"); netdata_log_error("web request to exit received."); - netdata_cleanup_and_exit(0, NULL, NULL, NULL); + netdata_cleanup_and_exit(EXIT_REASON_API_QUIT, NULL, NULL, NULL); return HTTP_RESP_OK; } else if(unlikely(hash == hash_debug && strcmp(tok, "debug") == 0)) { diff --git a/src/web/server/web_client.h b/src/web/server/web_client.h index 63e3adaa7a4903..9ff3e9d93d851f 100644 --- a/src/web/server/web_client.h +++ b/src/web/server/web_client.h @@ -131,7 +131,7 @@ void web_client_set_conn_webrtc(struct web_client *w); #define NETDATA_WEB_RESPONSE_HEADER_INITIAL_SIZE 4096 #define NETDATA_WEB_RESPONSE_INITIAL_SIZE 8192 #define NETDATA_WEB_REQUEST_INITIAL_SIZE 8192 -#define NETDATA_WEB_REQUEST_MAX_SIZE 65536 +#define NETDATA_WEB_REQUEST_MAX_SIZE (128 * 1024) #define NETDATA_WEB_DECODED_URL_INITIAL_SIZE 512 #define CLOUD_CLIENT_NAME_LENGTH 64