Skip to content

Commit

Permalink
Bump file descriptor rlimit to hard rlimit by default
Browse files Browse the repository at this point in the history
The default soft limit for the number of open file descriptors per-process in most Linux systems is 1024. This results in crashes on most HPC systems I've used recently as even simple lo2s invocations will exceed this limit with all the per-core perf_event_open calls.

This microscopic soft limit is in place because select() only allows fd's < 1024. If you do not plan to use select() in your code, it is safe to bump the file descriptor limit from the soft limit to the hard limit.
  • Loading branch information
cvonelm committed Jan 2, 2024
1 parent b4d5f99 commit b9b725a
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 0 deletions.
4 changes: 4 additions & 0 deletions include/lo2s/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

extern "C"
{
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>
Expand Down Expand Up @@ -111,6 +112,9 @@ void try_pin_to_scope(ExecutionScope scope);

int get_cgroup_mountpoint_fd(std::string cgroup);

void bump_rlimit_fd();
struct rlimit save_rlimit_fd();

Thread gettid();

std::set<std::uint32_t> parse_list(std::string list);
Expand Down
9 changes: 9 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,20 @@
#include <lo2s/monitor/process_monitor.hpp>
#include <lo2s/monitor/process_monitor_main.hpp>
#include <lo2s/summary.hpp>
#include <lo2s/util.hpp>

#include <system_error>

int main(int argc, const char** argv)
{
// The resource limit for file descriptors (which lo2s uses a lot of, especially in
// system-monitoring mode) is artifically low to cope with the ancient select() systemcall. We
// do not use select(), so we can safely bump the limit, but whatever command we are running
// under lo2s might (and resource limits are preserved accross fork()) so preserve it here so
// that we can restore it later
lo2s::save_rlimit_fd();
lo2s::bump_rlimit_fd();

try
{
lo2s::parse_program_options(argc, argv);
Expand Down
4 changes: 4 additions & 0 deletions src/monitor/process_monitor_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ namespace monitor

[[noreturn]] static void run_command(const std::vector<std::string>& command_and_args)
{
struct rlimit saved_rlimit = save_rlimit_fd();
setrlimit(RLIMIT_OFILE, &saved_rlimit);

/* kill yourself if the parent dies */
prctl(PR_SET_PDEATHSIG, SIGHUP);

Expand Down Expand Up @@ -109,6 +112,7 @@ void process_monitor_main(AbstractProcessMonitor& monitor)
throw_errno();
}
}

if (process == Process::invalid())
{
Log::error() << "Fork failed.";
Expand Down
20 changes: 20 additions & 0 deletions src/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,4 +327,24 @@ std::set<std::uint32_t> parse_list_from_file(std::filesystem::path file)

return std::set<std::uint32_t>();
}

struct rlimit save_rlimit_fd()
{
static struct rlimit current;

if (current.rlim_cur == 0)
{
getrlimit(RLIMIT_NOFILE, &current);
}
return current;
}

void bump_rlimit_fd()
{
struct rlimit highest;
getrlimit(RLIMIT_NOFILE, &highest);

highest.rlim_cur = highest.rlim_max;
setrlimit(RLIMIT_NOFILE, &highest);
}
} // namespace lo2s

0 comments on commit b9b725a

Please sign in to comment.