Skip to content

Commit

Permalink
feat(autoware_probabilistic_occupancy_grid_map): improved data handli…
Browse files Browse the repository at this point in the history
…ng on the ogm (autowarefoundation#10060)

feat: improved data handling in the ogm. When using the full concatenated pointcloud the processing time decreases from 8ms to 4ms

Signed-off-by: Kenzo Lobos-Tsunekawa <kenzo.lobos@tier4.jp>
  • Loading branch information
knzo25 authored Feb 10, 2025
1 parent b59cd8d commit c08e6b3
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 77 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ class OccupancyGridMapInterface : public nav2_costmap_2d::Costmap2D

bool isCudaEnabled() const;

void setCudaStream(const cudaStream_t & stream);

const autoware::cuda_utils::CudaUniquePtr<std::uint8_t[]> & getDeviceCostmap() const;

void copyDeviceCostmapToHost() const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,37 @@
class CudaPointCloud2 : public sensor_msgs::msg::PointCloud2
{
public:
void fromROSMsgAsync(const sensor_msgs::msg::PointCloud2 & msg)
void fromROSMsgAsync(const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_ptr)
{
// cSpell: ignore knzo25
// NOTE(knzo25): replace this with the cuda blackboard later
header = msg.header;
fields = msg.fields;
height = msg.height;
width = msg.width;
is_bigendian = msg.is_bigendian;
point_step = msg.point_step;
row_step = msg.row_step;
is_dense = msg.is_dense;

if (!data || capacity_ < msg.data.size()) {
capacity_ = 1024 * (msg.data.size() / 1024 + 1);
cudaStreamSynchronize(stream);
internal_msg_ = msg_ptr;

header = msg_ptr->header;
fields = msg_ptr->fields;
height = msg_ptr->height;
width = msg_ptr->width;
is_bigendian = msg_ptr->is_bigendian;
point_step = msg_ptr->point_step;
row_step = msg_ptr->row_step;
is_dense = msg_ptr->is_dense;

if (!data || capacity_ < msg_ptr->data.size()) {
const int factor = 4096 * point_step;
capacity_ = factor * (msg_ptr->data.size() / factor + 1);
data = autoware::cuda_utils::make_unique<std::uint8_t[]>(capacity_);
}

cudaMemcpyAsync(data.get(), msg.data.data(), msg.data.size(), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(
data.get(), msg_ptr->data.data(), msg_ptr->data.size(), cudaMemcpyHostToDevice, stream);
}

autoware::cuda_utils::CudaUniquePtr<std::uint8_t[]> data;
cudaStream_t stream;

private:
sensor_msgs::msg::PointCloud2::ConstSharedPtr internal_msg_;
std::size_t capacity_{0};
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ OccupancyGridMapInterface::OccupancyGridMapInterface(
const auto num_cells_x = this->getSizeInCellsX();
const auto num_cells_y = this->getSizeInCellsY();

cudaStreamCreate(&stream_);
device_costmap_ = autoware::cuda_utils::make_unique<std::uint8_t[]>(num_cells_x * num_cells_y);
device_costmap_aux_ =
autoware::cuda_utils::make_unique<std::uint8_t[]>(num_cells_x * num_cells_y);
Expand Down Expand Up @@ -136,8 +135,9 @@ void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_ori
device_costmap_.get(), lower_left_x, lower_left_y, size_x_, size_y_,
device_costmap_aux_.get(), 0, 0, cell_size_x, cell_size_y, cell_size_x, cell_size_y, stream_);

cudaMemset(
device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t));
cudaMemsetAsync(
device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t),
stream_);
} else {
local_map = new unsigned char[cell_size_x * cell_size_y];

Expand Down Expand Up @@ -207,6 +207,13 @@ bool OccupancyGridMapInterface::isCudaEnabled() const
return use_cuda_;
}

void OccupancyGridMapInterface::setCudaStream(const cudaStream_t & stream)
{
if (isCudaEnabled()) {
stream_ = stream;
}
}

const autoware::cuda_utils::CudaUniquePtr<std::uint8_t[]> &
OccupancyGridMapInterface::getDeviceCostmap() const
{
Expand All @@ -215,9 +222,10 @@ OccupancyGridMapInterface::getDeviceCostmap() const

void OccupancyGridMapInterface::copyDeviceCostmapToHost() const
{
cudaMemcpy(
cudaMemcpyAsync(
costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t),
cudaMemcpyDeviceToHost);
cudaMemcpyDeviceToHost, stream_);
cudaStreamSynchronize(stream_);
}

} // namespace costmap_2d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,12 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud(
const std::size_t num_raw_points = raw_pointcloud.width * raw_pointcloud.height;
float range_resolution_inv = 1.0 / map_res;

cudaStreamSynchronize(stream_);

map_fixed::prepareTensorLaunch(
reinterpret_cast<const float *>(raw_pointcloud.data.get()), num_raw_points,
raw_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_,
max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(),
device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(),
raw_points_tensor_.get(), raw_pointcloud.stream);
raw_points_tensor_.get(), stream_);

const std::size_t num_obstacle_points = obstacle_pointcloud.width * obstacle_pointcloud.height;

Expand All @@ -141,27 +139,23 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud(
obstacle_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_,
max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(),
device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(),
obstacle_points_tensor_.get(), obstacle_pointcloud.stream);
obstacle_points_tensor_.get(), stream_);

map_fixed::fillEmptySpaceLaunch(
raw_points_tensor_.get(), angle_bin_size, range_bin_size, range_resolution_inv,
scan_origin.position.x, scan_origin.position.y, origin_x_, origin_y_, num_cells_x, num_cells_y,
cost_value::FREE_SPACE, device_costmap_.get(), raw_pointcloud.stream);

cudaStreamSynchronize(obstacle_pointcloud.stream);
cost_value::FREE_SPACE, device_costmap_.get(), stream_);

map_fixed::fillUnknownSpaceLaunch(
raw_points_tensor_.get(), obstacle_points_tensor_.get(), distance_margin_, angle_bin_size,
range_bin_size, range_resolution_inv, scan_origin.position.x, scan_origin.position.y, origin_x_,
origin_y_, num_cells_x, num_cells_y, cost_value::FREE_SPACE, cost_value::NO_INFORMATION,
device_costmap_.get(), raw_pointcloud.stream);
device_costmap_.get(), stream_);

map_fixed::fillObstaclesLaunch(
obstacle_points_tensor_.get(), distance_margin_, angle_bin_size, range_bin_size,
range_resolution_inv, origin_x_, origin_y_, num_cells_x, num_cells_y,
cost_value::LETHAL_OBSTACLE, device_costmap_.get(), raw_pointcloud.stream);

cudaStreamSynchronize(raw_pointcloud.stream);
cost_value::LETHAL_OBSTACLE, device_costmap_.get(), stream_);
}

void OccupancyGridMapFixedBlindSpot::initRosParam(rclcpp::Node & node)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ void OccupancyGridMapBBFUpdater::initRosParam(rclcpp::Node & node)
}
}

cudaMemcpy(
cudaMemcpyAsync(
device_probability_matrix_.get(), probability_matrix_vector.data(),
sizeof(float) * Index::NUM_STATES * Index::NUM_STATES, cudaMemcpyHostToDevice);
sizeof(float) * Index::NUM_STATES * Index::NUM_STATES, cudaMemcpyHostToDevice, stream_);
}

inline unsigned char OccupancyGridMapBBFUpdater::applyBBF(
Expand Down Expand Up @@ -103,12 +103,6 @@ bool OccupancyGridMapBBFUpdater::update(
Index::NUM_STATES, Index::FREE, Index::OCCUPIED, cost_value::FREE_SPACE,
cost_value::LETHAL_OBSTACLE, cost_value::NO_INFORMATION, v_ratio_,
getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_);

cudaMemcpy(
costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t),
cudaMemcpyDeviceToHost);

cudaStreamSynchronize(stream_);
} else {
for (unsigned int x = 0; x < getSizeInCellsX(); x++) {
for (unsigned int y = 0; y < getSizeInCellsY(); y++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ bool OccupancyGridMapLOBFUpdater::update(
applyLOBFLaunch(
single_frame_occupancy_grid_map.getDeviceCostmap().get(), cost_value::NO_INFORMATION,
getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_);

cudaMemcpy(
costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t),
cudaMemcpyDeviceToHost);

cudaStreamSynchronize(stream_);
} else {
for (unsigned int x = 0; x < getSizeInCellsX(); x++) {
for (unsigned int y = 0; y < getSizeInCellsY(); y++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,13 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
const double map_resolution = this->declare_parameter<double>("map_resolution");

/* Subscriber and publisher */
obstacle_pointcloud_sub_.subscribe(
this, "~/input/obstacle_pointcloud",
rclcpp::SensorDataQoS{}.keep_last(1).get_rmw_qos_profile());
raw_pointcloud_sub_.subscribe(
this, "~/input/raw_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1).get_rmw_qos_profile());
sync_ptr_ = std::make_shared<Sync>(SyncPolicy(5), obstacle_pointcloud_sub_, raw_pointcloud_sub_);

sync_ptr_->registerCallback(
std::bind(&PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw, this, _1, _2));
obstacle_pointcloud_sub_ptr_ = this->create_subscription<PointCloud2>(
"~/input/obstacle_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1),
std::bind(&PointcloudBasedOccupancyGridMapNode::obstaclePointcloudCallback, this, _1));
raw_pointcloud_sub_ptr_ = this->create_subscription<PointCloud2>(
"~/input/raw_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1),
std::bind(&PointcloudBasedOccupancyGridMapNode::rawPointcloudCallback, this, _1));

occupancy_grid_map_pub_ = create_publisher<OccupancyGrid>("~/output/occupancy_grid_map", 1);

const std::string updater_type = this->declare_parameter<std::string>("updater_type");
Expand All @@ -94,7 +92,6 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
occupancy_grid_map_updater_ptr_ = std::make_unique<OccupancyGridMapBBFUpdater>(
true, map_length / map_resolution, map_length / map_resolution, map_resolution);
}
occupancy_grid_map_updater_ptr_->initRosParam(*this);

const std::string grid_map_type = this->declare_parameter<std::string>("grid_map_type");

Expand All @@ -118,7 +115,15 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
occupancy_grid_map_updater_ptr_->getSizeInCellsY(),
occupancy_grid_map_updater_ptr_->getResolution());
}

cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking);
raw_pointcloud_.stream = stream_;
obstacle_pointcloud_.stream = stream_;
occupancy_grid_map_ptr_->setCudaStream(stream_);
occupancy_grid_map_updater_ptr_->setCudaStream(stream_);

occupancy_grid_map_ptr_->initRosParam(*this);
occupancy_grid_map_updater_ptr_->initRosParam(*this);

// initialize debug tool
{
Expand All @@ -140,14 +145,29 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
time_keeper_ = std::make_shared<autoware::universe_utils::TimeKeeper>(time_keeper);
}
}
}

void PointcloudBasedOccupancyGridMapNode::obstaclePointcloudCallback(
const PointCloud2::ConstSharedPtr & input_obstacle_msg)
{
obstacle_pointcloud_.fromROSMsgAsync(input_obstacle_msg);

cudaStreamCreate(&raw_pointcloud_.stream);
cudaStreamCreate(&obstacle_pointcloud_.stream);
if (obstacle_pointcloud_.header.stamp == raw_pointcloud_.header.stamp) {
onPointcloudWithObstacleAndRaw();
}
}

void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
const PointCloud2::ConstSharedPtr & input_obstacle_msg,
void PointcloudBasedOccupancyGridMapNode::rawPointcloudCallback(
const PointCloud2::ConstSharedPtr & input_raw_msg)
{
raw_pointcloud_.fromROSMsgAsync(input_raw_msg);

if (obstacle_pointcloud_.header.stamp == raw_pointcloud_.header.stamp) {
onPointcloudWithObstacleAndRaw();
}
}

void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw()
{
std::unique_ptr<ScopedTimeTrack> st_ptr;
if (time_keeper_) st_ptr = std::make_unique<ScopedTimeTrack>(__func__, *time_keeper_);
Expand All @@ -156,9 +176,6 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
stop_watch_ptr_->toc("processing_time", true);
}

raw_pointcloud_.fromROSMsgAsync(*input_raw_msg);
obstacle_pointcloud_.fromROSMsgAsync(*input_obstacle_msg);

// if scan_origin_frame_ is "", replace it with raw_pointcloud_.header.frame_id
if (scan_origin_frame_.empty()) {
scan_origin_frame_ = raw_pointcloud_.header.frame_id;
Expand All @@ -172,7 +189,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
return;
}
}
if (input_obstacle_msg->header.frame_id != base_link_frame_) {
if (obstacle_pointcloud_.header.frame_id != base_link_frame_) {
if (!utils::transformPointcloudAsync(obstacle_pointcloud_, *tf2_, base_link_frame_)) {
return;
}
Expand Down Expand Up @@ -221,7 +238,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(

// publish
occupancy_grid_map_pub_->publish(OccupancyGridMapToMsgPtr(
map_frame_, input_raw_msg->header.stamp, robot_pose.position.z,
map_frame_, raw_pointcloud_.header.stamp, robot_pose.position.z,
*occupancy_grid_map_ptr_)); // (todo) robot_pose may be altered with gridmap_origin
} else {
std::unique_ptr<ScopedTimeTrack> inner_st_ptr;
Expand All @@ -231,10 +248,11 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(

// Update with bayes filter
occupancy_grid_map_updater_ptr_->update(*occupancy_grid_map_ptr_);
occupancy_grid_map_updater_ptr_->copyDeviceCostmapToHost();

// publish
occupancy_grid_map_pub_->publish(OccupancyGridMapToMsgPtr(
map_frame_, input_raw_msg->header.stamp, robot_pose.position.z,
map_frame_, raw_pointcloud_.header.stamp, robot_pose.position.z,
*occupancy_grid_map_updater_ptr_));
}

Expand All @@ -244,7 +262,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
const double pipeline_latency_ms =
std::chrono::duration<double, std::milli>(
std::chrono::nanoseconds(
(this->get_clock()->now() - input_raw_msg->header.stamp).nanoseconds()))
(this->get_clock()->now() - raw_pointcloud_.header.stamp).nanoseconds()))
.count();
debug_publisher_ptr_->publish<autoware_internal_debug_msgs::msg::Float64Stamped>(
"debug/cyclic_time_ms", cyclic_time_ms);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,7 @@
#include <sensor_msgs/msg/laser_scan.hpp>
#include <sensor_msgs/point_cloud2_iterator.hpp>

#include <message_filters/pass_through.h>
#include <message_filters/subscriber.h>
#include <message_filters/sync_policies/exact_time.h>
#include <message_filters/synchronizer.h>
#include <cuda_runtime.h>
#include <tf2_ros/buffer.h>
#include <tf2_ros/transform_listener.h>

Expand All @@ -59,30 +56,28 @@ class PointcloudBasedOccupancyGridMapNode : public rclcpp::Node
explicit PointcloudBasedOccupancyGridMapNode(const rclcpp::NodeOptions & node_options);

private:
void onPointcloudWithObstacleAndRaw(
const PointCloud2::ConstSharedPtr & input_obstacle_msg,
const PointCloud2::ConstSharedPtr & input_raw_msg);
void obstaclePointcloudCallback(const PointCloud2::ConstSharedPtr & input_obstacle_msg);
void rawPointcloudCallback(const PointCloud2::ConstSharedPtr & input_raw_msg);
void onPointcloudWithObstacleAndRaw();

OccupancyGrid::UniquePtr OccupancyGridMapToMsgPtr(
const std::string & frame_id, const Time & stamp, const float & robot_pose_z,
const Costmap2D & occupancy_grid_map);

private:
rclcpp::Publisher<OccupancyGrid>::SharedPtr occupancy_grid_map_pub_;
message_filters::Subscriber<PointCloud2> obstacle_pointcloud_sub_;
message_filters::Subscriber<PointCloud2> raw_pointcloud_sub_;
rclcpp::Subscription<PointCloud2>::SharedPtr obstacle_pointcloud_sub_ptr_;
rclcpp::Subscription<PointCloud2>::SharedPtr raw_pointcloud_sub_ptr_;
std::unique_ptr<autoware::universe_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_{};
std::unique_ptr<autoware::universe_utils::DebugPublisher> debug_publisher_ptr_{};

std::shared_ptr<Buffer> tf2_{std::make_shared<Buffer>(get_clock())};
std::shared_ptr<TransformListener> tf2_listener_{std::make_shared<TransformListener>(*tf2_)};

using SyncPolicy = message_filters::sync_policies::ExactTime<PointCloud2, PointCloud2>;
using Sync = message_filters::Synchronizer<SyncPolicy>;
std::shared_ptr<Sync> sync_ptr_;

std::unique_ptr<OccupancyGridMapInterface> occupancy_grid_map_ptr_;
std::unique_ptr<OccupancyGridMapUpdaterInterface> occupancy_grid_map_updater_ptr_;

cudaStream_t stream_;
CudaPointCloud2 raw_pointcloud_;
CudaPointCloud2 obstacle_pointcloud_;

Expand Down

0 comments on commit c08e6b3

Please sign in to comment.