Skip to content

Commit 0a3e6d8

Browse files
author
Mehmet Emin BAŞOĞLU
authored
feat(system): create a package to monitor component containers (autowarefoundation#7094)
Signed-off-by: Mehmet Emin BAŞOĞLU <memin@leodrive.ai>
1 parent d571fc4 commit 0a3e6d8

10 files changed

+586
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
cmake_minimum_required(VERSION 3.8)
2+
project(autoware_component_monitor)
3+
4+
find_package(autoware_cmake REQUIRED)
5+
autoware_package()
6+
7+
find_package(Boost REQUIRED COMPONENTS
8+
filesystem
9+
)
10+
11+
ament_auto_add_library(${PROJECT_NAME} SHARED
12+
src/component_monitor_node.cpp
13+
)
14+
target_link_libraries(${PROJECT_NAME} ${Boost_LIBRARIES})
15+
16+
rclcpp_components_register_node(${PROJECT_NAME}
17+
PLUGIN "autoware::component_monitor::ComponentMonitor"
18+
EXECUTABLE ${PROJECT_NAME}_node
19+
)
20+
21+
if(BUILD_TESTING)
22+
ament_add_ros_isolated_gtest(test_unit_conversions test/test_unit_conversions.cpp)
23+
target_link_libraries(test_unit_conversions ${PROJECT_NAME})
24+
target_include_directories(test_unit_conversions PRIVATE src)
25+
endif()
26+
27+
ament_auto_package(
28+
INSTALL_TO_SHARE
29+
config
30+
launch
31+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# autoware_component_monitor
2+
3+
The `autoware_component_monitor` package allows monitoring system usage of component containers.
4+
The composable node inside the package is attached to a component container, and it publishes CPU and memory usage of
5+
the container.
6+
7+
## Inputs / Outputs
8+
9+
### Input
10+
11+
None.
12+
13+
### Output
14+
15+
| Name | Type | Description |
16+
| -------------------------- | -------------------------------------------------- | ---------------------- |
17+
| `~/component_system_usage` | `autoware_internal_msgs::msg::ResourceUsageReport` | CPU, Memory usage etc. |
18+
19+
## Parameters
20+
21+
### Core Parameters
22+
23+
{{ json_to_markdown("system/autoware_component_monitor/schema/component_monitor.schema.json") }}
24+
25+
## How to use
26+
27+
Add it as a composable node in your launch file:
28+
29+
```xml
30+
31+
<launch>
32+
<group>
33+
<push-ros-namespace namespace="your_namespace"/>
34+
...
35+
36+
<load_composable_node target="$(var container_name)">
37+
<composable_node pkg="autoware_component_monitor"
38+
plugin="autoware::component_monitor::ComponentMonitor"
39+
name="component_monitor">
40+
<param from="$(find-pkg-share autoware_component_monitor)/config/component_monitor.param.yaml"/>
41+
</composable_node>
42+
</load_composable_node>
43+
44+
...
45+
</group>
46+
</launch>
47+
```
48+
49+
### Quick testing
50+
51+
You can test the package by running the following command:
52+
53+
```bash
54+
ros2 component load <container_name> autoware_component_monitor autoware::component_monitor::ComponentMonitor -p publish_rate:=10.0 --node-namespace <namespace>
55+
56+
# Example usage
57+
ros2 component load /pointcloud_container autoware_component_monitor autoware::component_monitor::ComponentMonitor -p publish_rate:=10.0 --node-namespace /pointcloud_container
58+
```
59+
60+
## How it works
61+
62+
The package uses the `top` command under the hood.
63+
`top -b -n 1 -E k -p PID` command is run at 10 Hz to get the system usage of the process.
64+
65+
- `-b` activates the batch mode. By default, `top` doesn't exit and prints to stdout periodically. Batch mode allows
66+
exiting the program.
67+
- `-n` number of times should `top` prints the system usage in batch mode.
68+
- `-p` specifies the PID of the process to monitor.
69+
- `-E k` changes the memory unit in the summary section to KiB.
70+
71+
Here is a sample output:
72+
73+
```text
74+
top - 13:57:26 up 3:14, 1 user, load average: 1,09, 1,10, 1,04
75+
Tasks: 1 total, 0 running, 1 sleeping, 0 stopped, 0 zombie
76+
%Cpu(s): 0,0 us, 0,8 sy, 0,0 ni, 99,2 id, 0,0 wa, 0,0 hi, 0,0 si, 0,0 st
77+
KiB Mem : 65532208 total, 35117428 free, 17669824 used, 12744956 buff/cache
78+
KiB Swap: 39062524 total, 39062524 free, 0 used. 45520816 avail Mem
79+
80+
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
81+
3352 meb 20 0 2905940 1,2g 39292 S 0,0 2,0 23:24.01 awesome
82+
```
83+
84+
We get 5th, 8th fields from the last line, which are RES, %CPU respectively.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/**:
2+
ros__parameters:
3+
publish_rate: 5.0 # Hz
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<launch>
2+
<arg name="param_file" default="$(find-pkg-share autoware_component_monitor)/config/component_monitor.param.yaml"/>
3+
4+
<node_container pkg="rclcpp_components" exec="component_container_mt" name="component_monitor_container" namespace="/">
5+
<composable_node pkg="autoware_component_monitor" plugin="autoware::component_monitor::ComponentMonitor" name="component_monitor">
6+
<param from="$(var param_file)"/>
7+
</composable_node>
8+
</node_container>
9+
</launch>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?xml version="1.0"?>
2+
<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
3+
<package format="3">
4+
<name>autoware_component_monitor</name>
5+
<version>0.0.0</version>
6+
<description>A ROS 2 package to monitor system usage of component containers.</description>
7+
<maintainer email="memin@leodrive.ai">Mehmet Emin Başoğlu</maintainer>
8+
<license>Apache-2.0</license>
9+
10+
<buildtool_depend>ament_cmake_auto</buildtool_depend>
11+
<buildtool_depend>autoware_cmake</buildtool_depend>
12+
13+
<depend>autoware_internal_msgs</depend>
14+
<depend>libboost-filesystem-dev</depend>
15+
<depend>rclcpp</depend>
16+
<depend>rclcpp_components</depend>
17+
18+
<test_depend>ament_cmake_ros</test_depend>
19+
<test_depend>ament_lint_auto</test_depend>
20+
<test_depend>autoware_lint_common</test_depend>
21+
22+
<export>
23+
<build_type>ament_cmake</build_type>
24+
</export>
25+
</package>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema#",
3+
"title": "Parameters for the Component Monitor node",
4+
"type": "object",
5+
"definitions": {
6+
"component_monitor": {
7+
"type": "object",
8+
"properties": {
9+
"publish_rate": {
10+
"type": "number",
11+
"default": "5.0",
12+
"description": "Publish rate in Hz"
13+
}
14+
},
15+
"required": ["publish_rate"]
16+
}
17+
},
18+
"properties": {
19+
"/**": {
20+
"type": "object",
21+
"properties": {
22+
"ros__parameters": {
23+
"$ref": "#/definitions/component_monitor"
24+
}
25+
},
26+
"required": ["ros__parameters"]
27+
}
28+
},
29+
"required": ["/**"]
30+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
// Copyright 2024 The Autoware Foundation
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "component_monitor_node.hpp"
16+
17+
#include "unit_conversions.hpp"
18+
19+
#include <rclcpp/rclcpp.hpp>
20+
21+
#include <autoware_internal_msgs/msg/resource_usage_report.hpp>
22+
23+
#include <boost/process.hpp>
24+
25+
#include <cctype>
26+
#include <cstdint>
27+
#include <exception>
28+
#include <functional>
29+
#include <sstream>
30+
#include <string>
31+
#include <unordered_map>
32+
#include <utility>
33+
#include <vector>
34+
35+
namespace autoware::component_monitor
36+
{
37+
ComponentMonitor::ComponentMonitor(const rclcpp::NodeOptions & node_options)
38+
: Node("component_monitor", node_options), publish_rate_(declare_parameter<double>("publish_rate"))
39+
{
40+
usage_pub_ =
41+
create_publisher<ResourceUsageReport>("~/component_system_usage", rclcpp::SensorDataQoS());
42+
43+
// Make sure top ins installed and is in path
44+
const auto path_top = boost::process::search_path("top");
45+
if (path_top.empty()) {
46+
RCLCPP_ERROR_STREAM(get_logger(), "Couldn't find 'top' in path.");
47+
rclcpp::shutdown();
48+
}
49+
50+
// Get the PID of the current process
51+
int pid = getpid();
52+
53+
environment_ = boost::this_process::environment();
54+
environment_["LC_NUMERIC"] = "en_US.UTF-8";
55+
56+
on_timer_tick_wrapped_ = std::bind(&ComponentMonitor::on_timer_tick, this, pid);
57+
58+
timer_ = rclcpp::create_timer(
59+
this, get_clock(), rclcpp::Rate(publish_rate_).period(), on_timer_tick_wrapped_);
60+
}
61+
62+
void ComponentMonitor::on_timer_tick(const int pid) const
63+
{
64+
if (usage_pub_->get_subscription_count() == 0) return;
65+
66+
try {
67+
auto usage_msg = pid_to_report(pid);
68+
usage_msg.header.stamp = this->now();
69+
usage_msg.pid = pid;
70+
usage_pub_->publish(usage_msg);
71+
} catch (std::exception & e) {
72+
RCLCPP_ERROR(get_logger(), "%s", e.what());
73+
} catch (...) {
74+
RCLCPP_ERROR(get_logger(), "An unknown error occurred.");
75+
}
76+
}
77+
78+
ComponentMonitor::ResourceUsageReport ComponentMonitor::pid_to_report(const pid_t & pid) const
79+
{
80+
const auto std_out = run_system_command("top -b -n 1 -E k -p " + std::to_string(pid));
81+
82+
const auto fields = parse_lines_into_words(std_out);
83+
84+
ResourceUsageReport report;
85+
report.cpu_cores_utilized = std::stof(fields.back().at(8)) / 100.0f;
86+
report.total_memory_bytes = unit_conversions::kib_to_bytes(std::stoul(fields.at(3).at(3)));
87+
report.free_memory_bytes = unit_conversions::kib_to_bytes(std::stoul(fields.at(3).at(5)));
88+
report.process_memory_bytes = parse_memory_res(fields.back().at(5));
89+
90+
return report;
91+
}
92+
93+
std::stringstream ComponentMonitor::run_system_command(const std::string & cmd) const
94+
{
95+
int out_fd[2];
96+
if (pipe2(out_fd, O_CLOEXEC) != 0) {
97+
RCLCPP_ERROR_STREAM(get_logger(), "Error setting flags on out_fd");
98+
}
99+
boost::process::pipe out_pipe{out_fd[0], out_fd[1]};
100+
boost::process::ipstream is_out{std::move(out_pipe)};
101+
102+
int err_fd[2];
103+
if (pipe2(err_fd, O_CLOEXEC) != 0) {
104+
RCLCPP_ERROR_STREAM(get_logger(), "Error setting flags on err_fd");
105+
}
106+
boost::process::pipe err_pipe{err_fd[0], err_fd[1]};
107+
boost::process::ipstream is_err{std::move(err_pipe)};
108+
109+
boost::process::child c(
110+
cmd, environment_, boost::process::std_out > is_out, boost::process::std_err > is_err);
111+
c.wait();
112+
113+
if (c.exit_code() != 0) {
114+
std::ostringstream os;
115+
is_err >> os.rdbuf();
116+
RCLCPP_ERROR_STREAM(get_logger(), "Error running command: " << os.str());
117+
}
118+
119+
std::stringstream sstream;
120+
sstream << is_out.rdbuf();
121+
return sstream;
122+
}
123+
124+
ComponentMonitor::VecVecStr ComponentMonitor::parse_lines_into_words(
125+
const std::stringstream & std_out)
126+
{
127+
VecVecStr fields;
128+
std::string line;
129+
std::istringstream input{std_out.str()};
130+
131+
while (std::getline(input, line)) {
132+
std::istringstream iss_line{line};
133+
std::string word;
134+
std::vector<std::string> words;
135+
136+
while (iss_line >> word) {
137+
words.push_back(word);
138+
}
139+
140+
fields.push_back(words);
141+
}
142+
143+
return fields;
144+
}
145+
146+
std::uint64_t ComponentMonitor::parse_memory_res(const std::string & mem_res)
147+
{
148+
// example 1: 12.3g
149+
// example 2: 123 (without suffix, just bytes)
150+
static const std::unordered_map<char, std::function<std::uint64_t(double)>> unit_map{
151+
{'k', unit_conversions::kib_to_bytes<double>}, {'m', unit_conversions::mib_to_bytes<double>},
152+
{'g', unit_conversions::gib_to_bytes<double>}, {'t', unit_conversions::tib_to_bytes<double>},
153+
{'p', unit_conversions::pib_to_bytes<double>}, {'e', unit_conversions::eib_to_bytes<double>}};
154+
155+
if (std::isdigit(mem_res.back())) {
156+
return std::stoull(mem_res); // Handle plain bytes without any suffix
157+
}
158+
159+
// Extract the numeric part and the unit suffix
160+
double value = std::stod(mem_res.substr(0, mem_res.size() - 1));
161+
char suffix = mem_res.back();
162+
163+
// Find the appropriate function from the map
164+
auto it = unit_map.find(suffix);
165+
if (it != unit_map.end()) {
166+
const auto & conversion_function = it->second;
167+
return conversion_function(value);
168+
}
169+
170+
// Throw an exception or handle the error as needed if the suffix is not recognized
171+
throw std::runtime_error("Unsupported unit suffix: " + std::string(1, suffix));
172+
}
173+
174+
} // namespace autoware::component_monitor
175+
176+
#include <rclcpp_components/register_node_macro.hpp>
177+
RCLCPP_COMPONENTS_REGISTER_NODE(autoware::component_monitor::ComponentMonitor)

0 commit comments

Comments
 (0)