Skip to content

Commit

Permalink
feat: change from node_death_monitor from process_alive_monitor
Browse files Browse the repository at this point in the history
Signed-off-by: Kyoichi Sugahara <kyoichi.sugahara@tier4.jp>
  • Loading branch information
kyoichi-sugahara committed Feb 10, 2025
1 parent 8ef11c7 commit 572427d
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 41 deletions.

This file was deleted.

Empty file.
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
cmake_minimum_required(VERSION 3.14)
project(autoware_node_death_monitor)
project(autoware_process_alive_monitor)

find_package(autoware_cmake REQUIRED)
autoware_package()

ament_auto_add_library(${PROJECT_NAME} SHARED
src/autoware_node_death_monitor.cpp
src/autoware_process_alive_monitor.cpp
)

rclcpp_components_register_node(${PROJECT_NAME}
PLUGIN "autoware::node_death_monitor::NodeDeathMonitor"
PLUGIN "autoware::process_alive_monitor::ProcessAliveMonitor"
EXECUTABLE ${PROJECT_NAME}_node)

ament_auto_package(INSTALL_TO_SHARE
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# autoware_node_death_monitor
# autoware_process_alive_monitor

This package provides a monitoring node that detects ROS 2 node crashes by analyzing `launch.log` files, rather than subscribing to `/rosout` logs.

---

## Overview

- **Node name**: `autoware_node_death_monitor`
- **Node name**: `autoware_process_alive_monitor`
- **Monitored file**: `launch.log`
- **Detected event**: Looks for lines containing the substring `"process has died"` and extracts the node name and exit code.

Expand All @@ -16,7 +16,7 @@ When a crash or unexpected shutdown occurs, `ros2 launch` typically outputs a li
[ERROR] [node_name-1]: process has died [pid 12345, exit code 139, cmd '...']
```

The `autoware_node_death_monitor` node continuously reads the latest `launch.log` file, detects these messages, and logs a warning or marks the node as "dead."
The `autoware_process_alive_monitor` node continuously reads the latest `launch.log` file, detects these messages, and logs a warning or marks the node as "dead."

---

Expand Down Expand Up @@ -48,10 +48,10 @@ The `autoware_node_death_monitor` node continuously reads the latest `launch.log
| `check_interval` | `double` | `1.0` | Timer interval (seconds) for scanning the log file. |
| `enable_debug` | `bool` | `false` | Enables debug logging for detailed output. |

Example **`autoware_node_death_monitor.param.yaml`**:
Example **`autoware_process_alive_monitor.param.yaml`**:

```yaml
autoware_node_death_monitor:
autoware_process_alive_monitor:
ros__parameters:
ignore_node_names:
- rviz2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef AUTOWARE_NODE_DEATH_MONITOR__AUTOWARE_NODE_DEATH_MONITOR_HPP_
#define AUTOWARE_NODE_DEATH_MONITOR__AUTOWARE_NODE_DEATH_MONITOR_HPP_
#ifndef AUTOWARE_PROCESS_ALIVE_MONITOR__AUTOWARE_PROCESS_ALIVE_MONITOR_HPP_
#define AUTOWARE_PROCESS_ALIVE_MONITOR__AUTOWARE_PROCESS_ALIVE_MONITOR_HPP_

#include "rclcpp/rclcpp.hpp"

Expand All @@ -22,17 +22,17 @@
#include <unordered_map>
#include <vector>

namespace autoware::node_death_monitor
namespace autoware::process_alive_monitor
{

class NodeDeathMonitor : public rclcpp::Node
class ProcessAliveMonitor : public rclcpp::Node
{
public:
/**
* @brief Constructor for NodeDeathMonitor
* @brief Constructor for ProcessAliveMonitor
* @param options Node options for configuration
*/
explicit NodeDeathMonitor(const rclcpp::NodeOptions & options);
explicit ProcessAliveMonitor(const rclcpp::NodeOptions & options);

private:
/**
Expand Down Expand Up @@ -67,6 +67,6 @@ class NodeDeathMonitor : public rclcpp::Node
bool enable_debug_{false}; // Enable debug output
};

} // namespace autoware::node_death_monitor
} // namespace autoware::process_alive_monitor

#endif // AUTOWARE_NODE_DEATH_MONITOR__AUTOWARE_NODE_DEATH_MONITOR_HPP_
#endif // AUTOWARE_PROCESS_ALIVE_MONITOR__AUTOWARE_PROCESS_ALIVE_MONITOR_HPP_
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<launch>
<!-- Parameter -->
<arg name="config_file" default="$(find-pkg-share autoware_process_alive_monitor)/config/autoware_process_alive_monitor.param.yaml"/>

<!-- Set log level -->
<arg name="log_level" default="info"/>

<node pkg="autoware_process_alive_monitor" exec="autoware_process_alive_monitor_node" name="process_alive_monitor" output="screen" args="--ros-args --log-level $(var log_level)">
<!-- Parameter -->
<param from="$(var config_file)"/>
</node>
</launch>
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
<?xml version="1.0"?>
<package format="3">
<name>autoware_node_death_monitor</name>
<name>autoware_process_alive_monitor</name>
<version>0.0.1</version>
<description>The node_death_monitor package</description>
<description>The process_alive_monitor package</description>

<maintainer email="kyoichi.sugahara@tier4.jp">Kyoichi Sugahara</maintainer>
<license>Apache License 2.0</license>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "autoware_node_death_monitor/autoware_node_death_monitor.hpp"
#include "autoware_process_alive_monitor/autoware_process_alive_monitor.hpp"

#include <filesystem>
#include <fstream>
Expand All @@ -22,7 +22,7 @@

namespace fs = std::filesystem;

namespace autoware::node_death_monitor
namespace autoware::process_alive_monitor
{

/**
Expand Down Expand Up @@ -76,8 +76,8 @@ static fs::path find_latest_launch_log()
return fs::path();
}

NodeDeathMonitor::NodeDeathMonitor(const rclcpp::NodeOptions & options)
: Node("autoware_node_death_monitor", options)
ProcessAliveMonitor::ProcessAliveMonitor(const rclcpp::NodeOptions & options)
: Node("autoware_process_alive_monitor", options)
{
ignore_node_names_ =
declare_parameter<std::vector<std::string>>("ignore_node_names", std::vector<std::string>{});
Expand Down Expand Up @@ -109,10 +109,10 @@ NodeDeathMonitor::NodeDeathMonitor(const rclcpp::NodeOptions & options)

auto interval_ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::duration<double>(check_interval_));
timer_ = create_wall_timer(interval_ns, std::bind(&NodeDeathMonitor::on_timer, this));
timer_ = create_wall_timer(interval_ns, std::bind(&ProcessAliveMonitor::on_timer, this));
}

void NodeDeathMonitor::read_launch_log_diff()
void ProcessAliveMonitor::read_launch_log_diff()
{
if (launch_log_path_.empty()) {
return;
Expand Down Expand Up @@ -150,7 +150,7 @@ void NodeDeathMonitor::read_launch_log_diff()
std::streampos last_valid_pos = static_cast<std::streampos>(last_file_pos_);

size_t iteration = 0;
while (true) {
while (rclcpp::ok) {
// Check current position
std::streampos current_pos_start = ifs.tellg();
if (current_pos_start == std::streampos(-1)) {
Expand Down Expand Up @@ -200,7 +200,7 @@ void NodeDeathMonitor::read_launch_log_diff()
}
}

void NodeDeathMonitor::parse_log_line(const std::string & line)
void ProcessAliveMonitor::parse_log_line(const std::string & line)
{
const std::string target_str = "process has died";
if (line.find(target_str) == std::string::npos) {
Expand Down Expand Up @@ -286,7 +286,7 @@ void NodeDeathMonitor::parse_log_line(const std::string & line)
}
}

void NodeDeathMonitor::on_timer()
void ProcessAliveMonitor::on_timer()
{
read_launch_log_diff();

Expand All @@ -304,7 +304,7 @@ void NodeDeathMonitor::on_timer()
}
}

} // namespace autoware::node_death_monitor
} // namespace autoware::process_alive_monitor

#include <rclcpp_components/register_node_macro.hpp>
RCLCPP_COMPONENTS_REGISTER_NODE(autoware::node_death_monitor::NodeDeathMonitor)
RCLCPP_COMPONENTS_REGISTER_NODE(autoware::process_alive_monitor::ProcessAliveMonitor)

0 comments on commit 572427d

Please sign in to comment.