Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: sync awf-latest #1796

Merged
merged 1 commit into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ class OccupancyGridMapInterface : public nav2_costmap_2d::Costmap2D

bool isCudaEnabled() const;

void setCudaStream(const cudaStream_t & stream);

const autoware::cuda_utils::CudaUniquePtr<std::uint8_t[]> & getDeviceCostmap() const;

void copyDeviceCostmapToHost() const;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,37 @@
class CudaPointCloud2 : public sensor_msgs::msg::PointCloud2
{
public:
void fromROSMsgAsync(const sensor_msgs::msg::PointCloud2 & msg)
void fromROSMsgAsync(const sensor_msgs::msg::PointCloud2::ConstSharedPtr & msg_ptr)
{
// cSpell: ignore knzo25
// NOTE(knzo25): replace this with the cuda blackboard later
header = msg.header;
fields = msg.fields;
height = msg.height;
width = msg.width;
is_bigendian = msg.is_bigendian;
point_step = msg.point_step;
row_step = msg.row_step;
is_dense = msg.is_dense;

if (!data || capacity_ < msg.data.size()) {
capacity_ = 1024 * (msg.data.size() / 1024 + 1);
cudaStreamSynchronize(stream);
internal_msg_ = msg_ptr;

header = msg_ptr->header;
fields = msg_ptr->fields;
height = msg_ptr->height;
width = msg_ptr->width;
is_bigendian = msg_ptr->is_bigendian;
point_step = msg_ptr->point_step;
row_step = msg_ptr->row_step;
is_dense = msg_ptr->is_dense;

if (!data || capacity_ < msg_ptr->data.size()) {
const int factor = 4096 * point_step;
capacity_ = factor * (msg_ptr->data.size() / factor + 1);
data = autoware::cuda_utils::make_unique<std::uint8_t[]>(capacity_);
}

cudaMemcpyAsync(data.get(), msg.data.data(), msg.data.size(), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(
data.get(), msg_ptr->data.data(), msg_ptr->data.size(), cudaMemcpyHostToDevice, stream);
}

autoware::cuda_utils::CudaUniquePtr<std::uint8_t[]> data;
cudaStream_t stream;

private:
sensor_msgs::msg::PointCloud2::ConstSharedPtr internal_msg_;
std::size_t capacity_{0};
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ OccupancyGridMapInterface::OccupancyGridMapInterface(
const auto num_cells_x = this->getSizeInCellsX();
const auto num_cells_y = this->getSizeInCellsY();

cudaStreamCreate(&stream_);
device_costmap_ = autoware::cuda_utils::make_unique<std::uint8_t[]>(num_cells_x * num_cells_y);
device_costmap_aux_ =
autoware::cuda_utils::make_unique<std::uint8_t[]>(num_cells_x * num_cells_y);
Expand Down Expand Up @@ -136,8 +135,9 @@ void OccupancyGridMapInterface::updateOrigin(double new_origin_x, double new_ori
device_costmap_.get(), lower_left_x, lower_left_y, size_x_, size_y_,
device_costmap_aux_.get(), 0, 0, cell_size_x, cell_size_y, cell_size_x, cell_size_y, stream_);

cudaMemset(
device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t));
cudaMemsetAsync(
device_costmap_.get(), cost_value::NO_INFORMATION, size_x_ * size_y_ * sizeof(std::uint8_t),
stream_);
} else {
local_map = new unsigned char[cell_size_x * cell_size_y];

Expand Down Expand Up @@ -207,6 +207,13 @@ bool OccupancyGridMapInterface::isCudaEnabled() const
return use_cuda_;
}

void OccupancyGridMapInterface::setCudaStream(const cudaStream_t & stream)
{
if (isCudaEnabled()) {
stream_ = stream;
}
}

const autoware::cuda_utils::CudaUniquePtr<std::uint8_t[]> &
OccupancyGridMapInterface::getDeviceCostmap() const
{
Expand All @@ -215,9 +222,10 @@ OccupancyGridMapInterface::getDeviceCostmap() const

void OccupancyGridMapInterface::copyDeviceCostmapToHost() const
{
cudaMemcpy(
cudaMemcpyAsync(
costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t),
cudaMemcpyDeviceToHost);
cudaMemcpyDeviceToHost, stream_);
cudaStreamSynchronize(stream_);
}

} // namespace costmap_2d
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,12 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud(
const std::size_t num_raw_points = raw_pointcloud.width * raw_pointcloud.height;
float range_resolution_inv = 1.0 / map_res;

cudaStreamSynchronize(stream_);

map_fixed::prepareTensorLaunch(
reinterpret_cast<const float *>(raw_pointcloud.data.get()), num_raw_points,
raw_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_,
max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(),
device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(),
raw_points_tensor_.get(), raw_pointcloud.stream);
raw_points_tensor_.get(), stream_);

const std::size_t num_obstacle_points = obstacle_pointcloud.width * obstacle_pointcloud.height;

Expand All @@ -141,27 +139,23 @@ void OccupancyGridMapFixedBlindSpot::updateWithPointCloud(
obstacle_pointcloud.point_step / sizeof(float), angle_bin_size, range_bin_size, min_height_,
max_height_, min_angle_, angle_increment_inv_, range_resolution_inv, device_rotation_map_.get(),
device_translation_map_.get(), device_rotation_scan_.get(), device_translation_scan_.get(),
obstacle_points_tensor_.get(), obstacle_pointcloud.stream);
obstacle_points_tensor_.get(), stream_);

map_fixed::fillEmptySpaceLaunch(
raw_points_tensor_.get(), angle_bin_size, range_bin_size, range_resolution_inv,
scan_origin.position.x, scan_origin.position.y, origin_x_, origin_y_, num_cells_x, num_cells_y,
cost_value::FREE_SPACE, device_costmap_.get(), raw_pointcloud.stream);

cudaStreamSynchronize(obstacle_pointcloud.stream);
cost_value::FREE_SPACE, device_costmap_.get(), stream_);

map_fixed::fillUnknownSpaceLaunch(
raw_points_tensor_.get(), obstacle_points_tensor_.get(), distance_margin_, angle_bin_size,
range_bin_size, range_resolution_inv, scan_origin.position.x, scan_origin.position.y, origin_x_,
origin_y_, num_cells_x, num_cells_y, cost_value::FREE_SPACE, cost_value::NO_INFORMATION,
device_costmap_.get(), raw_pointcloud.stream);
device_costmap_.get(), stream_);

map_fixed::fillObstaclesLaunch(
obstacle_points_tensor_.get(), distance_margin_, angle_bin_size, range_bin_size,
range_resolution_inv, origin_x_, origin_y_, num_cells_x, num_cells_y,
cost_value::LETHAL_OBSTACLE, device_costmap_.get(), raw_pointcloud.stream);

cudaStreamSynchronize(raw_pointcloud.stream);
cost_value::LETHAL_OBSTACLE, device_costmap_.get(), stream_);
}

void OccupancyGridMapFixedBlindSpot::initRosParam(rclcpp::Node & node)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ void OccupancyGridMapBBFUpdater::initRosParam(rclcpp::Node & node)
}
}

cudaMemcpy(
cudaMemcpyAsync(
device_probability_matrix_.get(), probability_matrix_vector.data(),
sizeof(float) * Index::NUM_STATES * Index::NUM_STATES, cudaMemcpyHostToDevice);
sizeof(float) * Index::NUM_STATES * Index::NUM_STATES, cudaMemcpyHostToDevice, stream_);
}

inline unsigned char OccupancyGridMapBBFUpdater::applyBBF(
Expand Down Expand Up @@ -103,12 +103,6 @@ bool OccupancyGridMapBBFUpdater::update(
Index::NUM_STATES, Index::FREE, Index::OCCUPIED, cost_value::FREE_SPACE,
cost_value::LETHAL_OBSTACLE, cost_value::NO_INFORMATION, v_ratio_,
getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_);

cudaMemcpy(
costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t),
cudaMemcpyDeviceToHost);

cudaStreamSynchronize(stream_);
} else {
for (unsigned int x = 0; x < getSizeInCellsX(); x++) {
for (unsigned int y = 0; y < getSizeInCellsY(); y++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ bool OccupancyGridMapLOBFUpdater::update(
applyLOBFLaunch(
single_frame_occupancy_grid_map.getDeviceCostmap().get(), cost_value::NO_INFORMATION,
getSizeInCellsX() * getSizeInCellsY(), device_costmap_.get(), stream_);

cudaMemcpy(
costmap_, device_costmap_.get(), getSizeInCellsX() * getSizeInCellsY() * sizeof(std::uint8_t),
cudaMemcpyDeviceToHost);

cudaStreamSynchronize(stream_);
} else {
for (unsigned int x = 0; x < getSizeInCellsX(); x++) {
for (unsigned int y = 0; y < getSizeInCellsY(); y++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,13 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
const double map_resolution = this->declare_parameter<double>("map_resolution");

/* Subscriber and publisher */
obstacle_pointcloud_sub_.subscribe(
this, "~/input/obstacle_pointcloud",
rclcpp::SensorDataQoS{}.keep_last(1).get_rmw_qos_profile());
raw_pointcloud_sub_.subscribe(
this, "~/input/raw_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1).get_rmw_qos_profile());
sync_ptr_ = std::make_shared<Sync>(SyncPolicy(5), obstacle_pointcloud_sub_, raw_pointcloud_sub_);

sync_ptr_->registerCallback(
std::bind(&PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw, this, _1, _2));
obstacle_pointcloud_sub_ptr_ = this->create_subscription<PointCloud2>(
"~/input/obstacle_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1),
std::bind(&PointcloudBasedOccupancyGridMapNode::obstaclePointcloudCallback, this, _1));
raw_pointcloud_sub_ptr_ = this->create_subscription<PointCloud2>(
"~/input/raw_pointcloud", rclcpp::SensorDataQoS{}.keep_last(1),
std::bind(&PointcloudBasedOccupancyGridMapNode::rawPointcloudCallback, this, _1));

occupancy_grid_map_pub_ = create_publisher<OccupancyGrid>("~/output/occupancy_grid_map", 1);

const std::string updater_type = this->declare_parameter<std::string>("updater_type");
Expand All @@ -94,7 +92,6 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
occupancy_grid_map_updater_ptr_ = std::make_unique<OccupancyGridMapBBFUpdater>(
true, map_length / map_resolution, map_length / map_resolution, map_resolution);
}
occupancy_grid_map_updater_ptr_->initRosParam(*this);

const std::string grid_map_type = this->declare_parameter<std::string>("grid_map_type");

Expand All @@ -118,7 +115,15 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
occupancy_grid_map_updater_ptr_->getSizeInCellsY(),
occupancy_grid_map_updater_ptr_->getResolution());
}

cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking);
raw_pointcloud_.stream = stream_;
obstacle_pointcloud_.stream = stream_;
occupancy_grid_map_ptr_->setCudaStream(stream_);
occupancy_grid_map_updater_ptr_->setCudaStream(stream_);

occupancy_grid_map_ptr_->initRosParam(*this);
occupancy_grid_map_updater_ptr_->initRosParam(*this);

// initialize debug tool
{
Expand All @@ -140,14 +145,29 @@ PointcloudBasedOccupancyGridMapNode::PointcloudBasedOccupancyGridMapNode(
time_keeper_ = std::make_shared<autoware::universe_utils::TimeKeeper>(time_keeper);
}
}
}

void PointcloudBasedOccupancyGridMapNode::obstaclePointcloudCallback(
const PointCloud2::ConstSharedPtr & input_obstacle_msg)
{
obstacle_pointcloud_.fromROSMsgAsync(input_obstacle_msg);

cudaStreamCreate(&raw_pointcloud_.stream);
cudaStreamCreate(&obstacle_pointcloud_.stream);
if (obstacle_pointcloud_.header.stamp == raw_pointcloud_.header.stamp) {
onPointcloudWithObstacleAndRaw();
}
}

void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
const PointCloud2::ConstSharedPtr & input_obstacle_msg,
void PointcloudBasedOccupancyGridMapNode::rawPointcloudCallback(
const PointCloud2::ConstSharedPtr & input_raw_msg)
{
raw_pointcloud_.fromROSMsgAsync(input_raw_msg);

if (obstacle_pointcloud_.header.stamp == raw_pointcloud_.header.stamp) {
onPointcloudWithObstacleAndRaw();
}
}

void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw()
{
std::unique_ptr<ScopedTimeTrack> st_ptr;
if (time_keeper_) st_ptr = std::make_unique<ScopedTimeTrack>(__func__, *time_keeper_);
Expand All @@ -156,9 +176,6 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
stop_watch_ptr_->toc("processing_time", true);
}

raw_pointcloud_.fromROSMsgAsync(*input_raw_msg);
obstacle_pointcloud_.fromROSMsgAsync(*input_obstacle_msg);

// if scan_origin_frame_ is "", replace it with raw_pointcloud_.header.frame_id
if (scan_origin_frame_.empty()) {
scan_origin_frame_ = raw_pointcloud_.header.frame_id;
Expand All @@ -172,7 +189,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
return;
}
}
if (input_obstacle_msg->header.frame_id != base_link_frame_) {
if (obstacle_pointcloud_.header.frame_id != base_link_frame_) {
if (!utils::transformPointcloudAsync(obstacle_pointcloud_, *tf2_, base_link_frame_)) {
return;
}
Expand Down Expand Up @@ -221,7 +238,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(

// publish
occupancy_grid_map_pub_->publish(OccupancyGridMapToMsgPtr(
map_frame_, input_raw_msg->header.stamp, robot_pose.position.z,
map_frame_, raw_pointcloud_.header.stamp, robot_pose.position.z,
*occupancy_grid_map_ptr_)); // (todo) robot_pose may be altered with gridmap_origin
} else {
std::unique_ptr<ScopedTimeTrack> inner_st_ptr;
Expand All @@ -231,10 +248,11 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(

// Update with bayes filter
occupancy_grid_map_updater_ptr_->update(*occupancy_grid_map_ptr_);
occupancy_grid_map_updater_ptr_->copyDeviceCostmapToHost();

// publish
occupancy_grid_map_pub_->publish(OccupancyGridMapToMsgPtr(
map_frame_, input_raw_msg->header.stamp, robot_pose.position.z,
map_frame_, raw_pointcloud_.header.stamp, robot_pose.position.z,
*occupancy_grid_map_updater_ptr_));
}

Expand All @@ -244,7 +262,7 @@ void PointcloudBasedOccupancyGridMapNode::onPointcloudWithObstacleAndRaw(
const double pipeline_latency_ms =
std::chrono::duration<double, std::milli>(
std::chrono::nanoseconds(
(this->get_clock()->now() - input_raw_msg->header.stamp).nanoseconds()))
(this->get_clock()->now() - raw_pointcloud_.header.stamp).nanoseconds()))
.count();
debug_publisher_ptr_->publish<autoware_internal_debug_msgs::msg::Float64Stamped>(
"debug/cyclic_time_ms", cyclic_time_ms);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,7 @@
#include <sensor_msgs/msg/laser_scan.hpp>
#include <sensor_msgs/point_cloud2_iterator.hpp>

#include <message_filters/pass_through.h>
#include <message_filters/subscriber.h>
#include <message_filters/sync_policies/exact_time.h>
#include <message_filters/synchronizer.h>
#include <cuda_runtime.h>
#include <tf2_ros/buffer.h>
#include <tf2_ros/transform_listener.h>

Expand All @@ -59,30 +56,28 @@ class PointcloudBasedOccupancyGridMapNode : public rclcpp::Node
explicit PointcloudBasedOccupancyGridMapNode(const rclcpp::NodeOptions & node_options);

private:
void onPointcloudWithObstacleAndRaw(
const PointCloud2::ConstSharedPtr & input_obstacle_msg,
const PointCloud2::ConstSharedPtr & input_raw_msg);
void obstaclePointcloudCallback(const PointCloud2::ConstSharedPtr & input_obstacle_msg);
void rawPointcloudCallback(const PointCloud2::ConstSharedPtr & input_raw_msg);
void onPointcloudWithObstacleAndRaw();

OccupancyGrid::UniquePtr OccupancyGridMapToMsgPtr(
const std::string & frame_id, const Time & stamp, const float & robot_pose_z,
const Costmap2D & occupancy_grid_map);

private:
rclcpp::Publisher<OccupancyGrid>::SharedPtr occupancy_grid_map_pub_;
message_filters::Subscriber<PointCloud2> obstacle_pointcloud_sub_;
message_filters::Subscriber<PointCloud2> raw_pointcloud_sub_;
rclcpp::Subscription<PointCloud2>::SharedPtr obstacle_pointcloud_sub_ptr_;
rclcpp::Subscription<PointCloud2>::SharedPtr raw_pointcloud_sub_ptr_;
std::unique_ptr<autoware::universe_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_{};
std::unique_ptr<autoware::universe_utils::DebugPublisher> debug_publisher_ptr_{};

std::shared_ptr<Buffer> tf2_{std::make_shared<Buffer>(get_clock())};
std::shared_ptr<TransformListener> tf2_listener_{std::make_shared<TransformListener>(*tf2_)};

using SyncPolicy = message_filters::sync_policies::ExactTime<PointCloud2, PointCloud2>;
using Sync = message_filters::Synchronizer<SyncPolicy>;
std::shared_ptr<Sync> sync_ptr_;

std::unique_ptr<OccupancyGridMapInterface> occupancy_grid_map_ptr_;
std::unique_ptr<OccupancyGridMapUpdaterInterface> occupancy_grid_map_updater_ptr_;

cudaStream_t stream_;
CudaPointCloud2 raw_pointcloud_;
CudaPointCloud2 obstacle_pointcloud_;

Expand Down
Loading