From 1dc97260df11a0cc659548e85a0227642d5a96b4 Mon Sep 17 00:00:00 2001 From: Kenzo Lobos-Tsunekawa Date: Wed, 19 Mar 2025 10:09:36 +0900 Subject: [PATCH] feat: cuda accelerated version of the pointcloud concatenation Signed-off-by: Kenzo Lobos-Tsunekawa --- .../CMakeLists.txt | 13 + .../README.md | 7 +- ..._concatenate_and_time_sync_node.param.yaml | 21 ++ .../docs/cuda-concatenate-data.md | 6 + .../cuda_cloud_collector.hpp | 18 ++ .../cuda_combine_cloud_handler.hpp | 53 ++++ .../cuda_combine_cloud_handler_kernel.hpp | 57 ++++ .../cuda_concatenate_and_time_sync_node.hpp | 42 +++ .../cuda_concatenate_data/cuda_traits.hpp | 31 ++ ..._concatenate_and_time_sync_node.launch.xml | 11 + ...concatenate_and_time_sync_node.schema.json | 163 +++++++++++ .../cuda_cloud_collector.cpp | 20 ++ .../cuda_collector_matching_strategy.cpp | 21 ++ .../cuda_combine_cloud_handler.cpp | 269 ++++++++++++++++++ .../cuda_combine_cloud_handler_kernel.cu | 55 ++++ .../cuda_concatenate_and_time_sync_node.cpp | 73 +++++ 16 files changed, 857 insertions(+), 3 deletions(-) create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/config/cuda_concatenate_and_time_sync_node.param.yaml create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/docs/cuda-concatenate-data.md create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_cloud_collector.hpp create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler.hpp create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.hpp create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.hpp create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/launch/cuda_concatenate_and_time_sync_node.launch.xml create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/schema/concatenate_and_time_sync_node.schema.json create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_cloud_collector.cpp create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_collector_matching_strategy.cpp create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_combine_cloud_handler.cpp create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.cu create mode 100644 sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.cpp diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/CMakeLists.txt b/sensing/autoware_cuda_pointcloud_preprocessor/CMakeLists.txt index 3a77c27ce5f8b..0f686e962d530 100644 --- a/sensing/autoware_cuda_pointcloud_preprocessor/CMakeLists.txt +++ b/sensing/autoware_cuda_pointcloud_preprocessor/CMakeLists.txt @@ -37,6 +37,10 @@ include_directories( list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -diag-suppress 20012") # cSpell: ignore expt cuda_add_library(cuda_pointcloud_preprocessor_lib SHARED + src/cuda_concatenate_data/cuda_combine_cloud_handler.cpp + src/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.cu + src/cuda_concatenate_data/cuda_cloud_collector.cpp + src/cuda_concatenate_data/cuda_collector_matching_strategy.cpp src/cuda_pointcloud_preprocessor/cuda_pointcloud_preprocessor.cu src/cuda_pointcloud_preprocessor/common_kernels.cu src/cuda_pointcloud_preprocessor/organize_kernels.cu @@ -65,14 +69,23 @@ target_include_directories(cuda_pointcloud_preprocessor_lib SYSTEM PRIVATE # Targets ament_auto_add_library(cuda_pointcloud_preprocessor SHARED + src/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.cpp src/cuda_pointcloud_preprocessor/cuda_pointcloud_preprocessor_node.cpp ) target_link_libraries(cuda_pointcloud_preprocessor ${CUDA_LIBRARIES} + ${diagnostic_msgs_LIBRARIES} + ${diagnostic_msgs_LIBRARIES} cuda_pointcloud_preprocessor_lib ) +# ========== Concatenate and Sync data ========== +rclcpp_components_register_node(cuda_pointcloud_preprocessor + PLUGIN "autoware::cuda_pointcloud_preprocessor::CudaPointCloudConcatenateDataSynchronizerComponent" + EXECUTABLE cuda_concatenate_and_time_sync_node) + +# ========== Concatenate and Sync data ========== rclcpp_components_register_node(cuda_pointcloud_preprocessor PLUGIN "autoware::cuda_pointcloud_preprocessor::CudaPointcloudPreprocessorNode" EXECUTABLE cuda_pointcloud_preprocessor_node diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/README.md b/sensing/autoware_cuda_pointcloud_preprocessor/README.md index 0fa5ab7e7c65a..ceed68ca07a83 100644 --- a/sensing/autoware_cuda_pointcloud_preprocessor/README.md +++ b/sensing/autoware_cuda_pointcloud_preprocessor/README.md @@ -10,9 +10,10 @@ To alleviate this issue, this package reimplements most of the pipeline presente A detailed description of each filter's algorithm is available in the following links. -| Filter Name | Description | Detail | -| ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------- | -| cuda_pointcloud_preprocessor | Implements the cropping, distortion correction, and outlier filtering (ring-based) of the `autoware_pointcloud_preprocessor`'s CPU versions. | [link](docs/cuda-pointcloud-preprocessor.md) | +| Filter Name | Description | Detail | +| ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------- | +| cuda_pointcloud_preprocessor | Implements the cropping, distortion correction, and outlier filtering (ring-based) of the `autoware_pointcloud_preprocessor`'s CPU versions. | [link](docs/cuda-pointcloud-preprocessor.md) | +| cuda_concatenate_and_time_sync_node | Implements pointcloud concatenation an synchronization following `autoware_pointcloud_preprocessor`'s CPU implementation. | [link](docs/cuda-concatenate-data.md) | ## (Optional) Future extensions / Unimplemented parts diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/config/cuda_concatenate_and_time_sync_node.param.yaml b/sensing/autoware_cuda_pointcloud_preprocessor/config/cuda_concatenate_and_time_sync_node.param.yaml new file mode 100644 index 0000000000000..56fe6643b9973 --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/config/cuda_concatenate_and_time_sync_node.param.yaml @@ -0,0 +1,21 @@ +/**: + ros__parameters: + debug_mode: false + has_static_tf_only: false + rosbag_length: 10.0 + maximum_queue_size: 5 + timeout_sec: 0.2 + is_motion_compensated: true + publish_synchronized_pointcloud: true + keep_input_frame_in_synchronized_pointcloud: true + publish_previous_but_late_pointcloud: false + synchronized_pointcloud_postfix: pointcloud + input_twist_topic_type: twist + input_topics: [ + "/sensing/lidar/right/pointcloud_before_sync", + "/sensing/lidar/top/pointcloud_before_sync", + "/sensing/lidar/left/pointcloud_before_sync", + ] + output_frame: base_link + matching_strategy: + type: naive diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/docs/cuda-concatenate-data.md b/sensing/autoware_cuda_pointcloud_preprocessor/docs/cuda-concatenate-data.md new file mode 100644 index 0000000000000..70de6d36228fe --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/docs/cuda-concatenate-data.md @@ -0,0 +1,6 @@ +# cuda_concatenate_and_time_synchronize_node + +This package is a cuda accelerated version of the one available in [autoware_cuda_pointcloud_preprocessor](../autoware_pointcloud_preprocessor). +As this node is templated, the overall design, algorithm, inputs, and outputs are the same. + +The only change, corresponds to the pointcloud topics, which instead of using the standard `sensor_msgs::msg::PointCloud2` message type, they use the `cuda_blackboard` mechanism. diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_cloud_collector.hpp b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_cloud_collector.hpp new file mode 100644 index 0000000000000..b6f78dea17b5f --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_cloud_collector.hpp @@ -0,0 +1,18 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "autoware/pointcloud_preprocessor/concatenate_data/cloud_collector.hpp" +#include "cuda_combine_cloud_handler.hpp" diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler.hpp b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler.hpp new file mode 100644 index 0000000000000..34e689428c0c2 --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler.hpp @@ -0,0 +1,53 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp" +#include "autoware/pointcloud_preprocessor/concatenate_data/combine_cloud_handler.hpp" + +namespace autoware::pointcloud_preprocessor +{ + +template <> +class CombineCloudHandler : public CombineCloudHandlerBase +{ +protected: + struct CudaConcatStruct + { + cudaStream_t stream; + std::unique_ptr cloud_ptr; + std::size_t max_pointcloud_size{0}; + }; + + std::unordered_map cuda_concat_struct_map_; + std::unique_ptr concatenated_cloud_ptr_; + std::size_t max_concat_pointcloud_size_{0}; + std::mutex mutex_; + +public: + CombineCloudHandler( + rclcpp::Node & node, const std::vector & input_topics, std::string output_frame, + bool is_motion_compensated, bool publish_synchronized_pointcloud, + bool keep_input_frame_in_synchronized_pointcloud, bool has_static_tf_only); + + ConcatenatedCloudResult combine_pointclouds( + std::unordered_map< + std::string, typename CudaPointCloud2Traits::PointCloudMessage::ConstSharedPtr> & + topic_to_cloud_map); + + void allocate_pointclouds() override; +}; + +} // namespace autoware::pointcloud_preprocessor diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.hpp b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.hpp new file mode 100644 index 0000000000000..19bf5e172d4d8 --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.hpp @@ -0,0 +1,57 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef AUTOWARE__CUDA_POINTCLOUD_PREPROCESSOR__CUDA_CONCATENATE_DATA__CUDA_COMBINE_CLOUD_HANDLER_KERNEL_HPP_ +#define AUTOWARE__CUDA_POINTCLOUD_PREPROCESSOR__CUDA_CONCATENATE_DATA__CUDA_COMBINE_CLOUD_HANDLER_KERNEL_HPP_ + +#include + +#include + +namespace autoware::pointcloud_preprocessor +{ + +struct TransformStruct +{ + float translation_x; + float translation_y; + float translation_z; + float m11; + float m12; + float m13; + float m21; + float m22; + float m23; + float m31; + float m32; + float m33; +}; + +struct PointTypeStruct +{ + float x; + float y; + float z; + std::uint8_t intensity; + std::uint8_t return_type; + std::uint16_t channel; +}; + +void transform_launch( + const PointTypeStruct * input_points, int num_points, TransformStruct transform, + PointTypeStruct * output_points, cudaStream_t & stream); + +} // namespace autoware::pointcloud_preprocessor + +#endif // AUTOWARE__CUDA_POINTCLOUD_PREPROCESSOR__CUDA_CONCATENATE_DATA__CUDA_COMBINE_CLOUD_HANDLER_KERNEL_HPP_ diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.hpp b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.hpp new file mode 100644 index 0000000000000..d239ba08a1a64 --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.hpp @@ -0,0 +1,42 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp" +#include "autoware/pointcloud_preprocessor/concatenate_data/concatenate_and_time_sync_node.hpp" + +#include +#include +#include +#include + +namespace autoware::cuda_pointcloud_preprocessor +{ + +class CudaPointCloudConcatenateDataSynchronizerComponent +: public autoware::pointcloud_preprocessor::PointCloudConcatenateDataSynchronizerComponentTemplated< + autoware::pointcloud_preprocessor::CudaPointCloud2Traits> +{ +public: + explicit CudaPointCloudConcatenateDataSynchronizerComponent( + const rclcpp::NodeOptions & node_options) + : autoware::pointcloud_preprocessor::PointCloudConcatenateDataSynchronizerComponentTemplated< + autoware::pointcloud_preprocessor::CudaPointCloud2Traits>(node_options) + { + } + ~CudaPointCloudConcatenateDataSynchronizerComponent() override = default; +}; + +} // namespace autoware::cuda_pointcloud_preprocessor diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp new file mode 100644 index 0000000000000..0e6dadbf094f4 --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/include/autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp @@ -0,0 +1,31 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +namespace autoware::pointcloud_preprocessor +{ + +struct CudaPointCloud2Traits +{ + using PointCloudMessage = cuda_blackboard::CudaPointCloud2; + using PublisherType = cuda_blackboard::CudaBlackboardPublisher; + using SubscriberType = cuda_blackboard::CudaBlackboardSubscriber; +}; + +} // namespace autoware::pointcloud_preprocessor diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/launch/cuda_concatenate_and_time_sync_node.launch.xml b/sensing/autoware_cuda_pointcloud_preprocessor/launch/cuda_concatenate_and_time_sync_node.launch.xml new file mode 100644 index 0000000000000..1033da42b1dd0 --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/launch/cuda_concatenate_and_time_sync_node.launch.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/schema/concatenate_and_time_sync_node.schema.json b/sensing/autoware_cuda_pointcloud_preprocessor/schema/concatenate_and_time_sync_node.schema.json new file mode 100644 index 0000000000000..7a71b23dc841b --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/schema/concatenate_and_time_sync_node.schema.json @@ -0,0 +1,163 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Parameters for Cuda Concatenate and Time Synchronize Node", + "type": "object", + "definitions": { + "cuda_concatenate_and_time_sync_node": { + "type": "object", + "properties": { + "debug_mode": { + "type": "boolean", + "default": false, + "description": "Flag to enable debug mode to display additional logging information." + }, + "has_static_tf_only": { + "type": "boolean", + "default": false, + "description": "Flag to indicate if only static TF is used." + }, + "rosbag_length": { + "type": "number", + "default": 10.0, + "minimum": 0.0, + "description": " This value determine if the rosbag has started from the beginning again. The value should be set smaller than the actual length of the bag." + }, + "maximum_queue_size": { + "type": "integer", + "default": "5", + "minimum": 1, + "description": "Maximum size of the queue for the Keep Last policy in QoS history." + }, + "timeout_sec": { + "type": "number", + "default": "0.1", + "minimum": 0.001, + "description": "Timer's timeout duration in seconds when collectors are created. Collectors will concatenate the point clouds when timeout_sec reaches zero." + }, + "is_motion_compensated": { + "type": "boolean", + "default": true, + "description": "Flag to indicate if motion compensation is enabled." + }, + "publish_synchronized_pointcloud": { + "type": "boolean", + "default": true, + "description": "Flag to indicate if synchronized point cloud should be published." + }, + "keep_input_frame_in_synchronized_pointcloud": { + "type": "boolean", + "default": true, + "description": "Flag to indicate if input frame should be kept in synchronized point cloud." + }, + "publish_previous_but_late_pointcloud": { + "type": "boolean", + "default": false, + "description": "Flag to indicate if a concatenated point cloud should be published if its timestamp is earlier than the previous published concatenated point cloud." + }, + "synchronized_pointcloud_postfix": { + "type": "string", + "default": "pointcloud", + "description": "Postfix for the topic name of the synchronized point cloud." + }, + "input_twist_topic_type": { + "type": "string", + "enum": ["twist", "odom"], + "default": "twist", + "description": "Type of the input twist topic." + }, + "input_topics": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "default": ["cloud_topic1", "cloud_topic2", "cloud_topic3"], + "minItems": 2, + "description": "List of input point cloud topics." + }, + "output_frame": { + "type": "string", + "default": "base_link", + "minLength": 1, + "description": "Output frame." + }, + "matching_strategy": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["naive", "advanced"], + "default": "advanced", + "description": "Set it to `advanced` if you can synchronize your LiDAR sensor; otherwise, set it to `naive`." + }, + "lidar_timestamp_offsets": { + "type": "array", + "items": { + "type": "number", + "minimum": 0.0 + }, + "default": [0.0, 0.0, 0.0], + "minItems": 2, + "description": "List of LiDAR timestamp offsets in seconds (relative to the earliest LiDAR timestamp). The offsets should be provided in the same order as the input topics." + }, + "lidar_timestamp_noise_window": { + "type": "array", + "items": { + "type": "number", + "minimum": 0.0 + }, + "default": [0.01, 0.01, 0.01], + "minItems": 2, + "description": "List of LiDAR timestamp noise windows in seconds. The noise values should be specified in the same order as the input_topics." + } + }, + "required": ["type"], + "dependencies": { + "type": { + "oneOf": [ + { + "properties": { "type": { "const": "naive" } }, + "not": { + "required": ["lidar_timestamp_offsets", "lidar_timestamp_noise_window"] + } + }, + { + "properties": { "type": { "const": "advanced" } }, + "required": ["lidar_timestamp_offsets", "lidar_timestamp_noise_window"] + } + ] + } + } + } + }, + "required": [ + "debug_mode", + "has_static_tf_only", + "rosbag_length", + "maximum_queue_size", + "timeout_sec", + "is_motion_compensated", + "publish_synchronized_pointcloud", + "keep_input_frame_in_synchronized_pointcloud", + "publish_previous_but_late_pointcloud", + "synchronized_pointcloud_postfix", + "input_twist_topic_type", + "input_topics", + "output_frame", + "matching_strategy" + ] + } + }, + "properties": { + "/**": { + "type": "object", + "properties": { + "ros__parameters": { + "$ref": "#/definitions/cuda_concatenate_and_time_sync_node" + } + }, + "required": ["ros__parameters"] + } + }, + "required": ["/**"] +} diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_cloud_collector.cpp b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_cloud_collector.cpp new file mode 100644 index 0000000000000..70686d6bfb1bd --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_cloud_collector.cpp @@ -0,0 +1,20 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_cloud_collector.hpp" + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp" + +template class autoware::pointcloud_preprocessor::CloudCollector< + autoware::pointcloud_preprocessor::CudaPointCloud2Traits>; diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_collector_matching_strategy.cpp b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_collector_matching_strategy.cpp new file mode 100644 index 0000000000000..ca183e1f39f01 --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_collector_matching_strategy.cpp @@ -0,0 +1,21 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp" +#include "autoware/pointcloud_preprocessor/concatenate_data/collector_matching_strategy.hpp" + +template class autoware::pointcloud_preprocessor::NaiveMatchingStrategy< + autoware::pointcloud_preprocessor::CudaPointCloud2Traits>; +template class autoware::pointcloud_preprocessor::AdvancedMatchingStrategy< + autoware::pointcloud_preprocessor::CudaPointCloud2Traits>; diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_combine_cloud_handler.cpp b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_combine_cloud_handler.cpp new file mode 100644 index 0000000000000..c106035be068f --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_combine_cloud_handler.cpp @@ -0,0 +1,269 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler.hpp" + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.hpp" +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp" + +#include + +#include + +#include +#include +#include +#include + +#define CHECK_OFFSET(structure1, structure2, field) \ + static_assert( \ + offsetof(structure1, field) == offsetof(structure2, field), \ + "Offset of " #field " in " #structure1 " does not match expected offset.") + +static_assert( + sizeof(autoware::pointcloud_preprocessor::PointTypeStruct) == + sizeof(autoware::point_types::PointXYZIRC)); + +CHECK_OFFSET( + autoware::pointcloud_preprocessor::PointTypeStruct, autoware::point_types::PointXYZIRCAEDT, x); +CHECK_OFFSET( + autoware::pointcloud_preprocessor::PointTypeStruct, autoware::point_types::PointXYZIRCAEDT, y); +CHECK_OFFSET( + autoware::pointcloud_preprocessor::PointTypeStruct, autoware::point_types::PointXYZIRCAEDT, z); +CHECK_OFFSET( + autoware::pointcloud_preprocessor::PointTypeStruct, autoware::point_types::PointXYZIRCAEDT, + intensity); +CHECK_OFFSET( + autoware::pointcloud_preprocessor::PointTypeStruct, autoware::point_types::PointXYZIRCAEDT, + return_type); +CHECK_OFFSET( + autoware::pointcloud_preprocessor::PointTypeStruct, autoware::point_types::PointXYZIRCAEDT, + channel); + +namespace autoware::pointcloud_preprocessor +{ + +CombineCloudHandler::CombineCloudHandler( + rclcpp::Node & node, const std::vector & input_topics, std::string output_frame, + bool is_motion_compensated, bool publish_synchronized_pointcloud, + bool keep_input_frame_in_synchronized_pointcloud, bool has_static_tf_only) +: CombineCloudHandlerBase( + node, input_topics, output_frame, is_motion_compensated, publish_synchronized_pointcloud, + keep_input_frame_in_synchronized_pointcloud, has_static_tf_only) +{ + for (const auto & topic : input_topics_) { + CudaConcatStruct cuda_concat_struct; + cudaStreamCreate(&cuda_concat_struct.stream); + cuda_concat_struct_map_[topic] = std::move(cuda_concat_struct); + } +} + +void CombineCloudHandler::allocate_pointclouds() +{ + std::lock_guard lock(mutex_); + + for (const auto & topic : input_topics_) { + auto & concat_struct = cuda_concat_struct_map_[topic]; + concat_struct.cloud_ptr = std::make_unique(); + concat_struct.cloud_ptr->data = + cuda_blackboard::make_unique(concat_struct.max_pointcloud_size); + } + + concatenated_cloud_ptr_ = std::make_unique(); + concatenated_cloud_ptr_->data = cuda_blackboard::make_unique( + max_concat_pointcloud_size_ * input_topics_.size()); +} + +ConcatenatedCloudResult +CombineCloudHandler::combine_pointclouds( + std::unordered_map< + std::string, typename CudaPointCloud2Traits::PointCloudMessage::ConstSharedPtr> & + topic_to_cloud_map) +{ + ConcatenatedCloudResult concatenate_cloud_result; + std::lock_guard lock(mutex_); + + if (topic_to_cloud_map.empty()) return concatenate_cloud_result; + + std::vector pc_stamps; + pc_stamps.reserve(topic_to_cloud_map.size()); + + for (const auto & [topic, cloud] : topic_to_cloud_map) { + pc_stamps.emplace_back(cloud->header.stamp); + } + + std::sort(pc_stamps.begin(), pc_stamps.end(), std::greater()); + auto oldest_stamp = pc_stamps.back(); + + // Before combining the pointclouds, initialize and reserve space for the concatenated pointcloud + concatenate_cloud_result.concatenate_cloud_ptr = + std::make_unique(); + + // Reserve space based on the total size of the pointcloud data to speed up the concatenation + // process + size_t total_data_size = 0; + size_t total_points = 0; + for (const auto & [topic, cloud] : topic_to_cloud_map) { + total_data_size += (cloud->height * cloud->row_step); + total_points += (cloud->height * cloud->width); + } + + const auto point_fields = topic_to_cloud_map.begin()->second->fields; + + if (total_data_size > max_concat_pointcloud_size_ || !concatenated_cloud_ptr_) { + max_concat_pointcloud_size_ = (total_data_size + 1024) / 1024 * 1024; + concatenated_cloud_ptr_ = std::make_unique(); + concatenated_cloud_ptr_->data = cuda_blackboard::make_unique( + max_concat_pointcloud_size_ * input_topics_.size()); + } + + concatenate_cloud_result.concatenate_cloud_ptr = std::move(concatenated_cloud_ptr_); + + PointTypeStruct * output_points = + reinterpret_cast(concatenate_cloud_result.concatenate_cloud_ptr->data.get()); + std::size_t concatenated_start_index = 0; + + for (const auto & [topic, cloud] : topic_to_cloud_map) { + const std::size_t num_points = cloud->height * cloud->width; + + // Compute motion compensation transform + Eigen::Matrix4f transform = Eigen::Matrix4f::Identity(); + + // Transform if needed + managed_tf_buffer_->get_transform(output_frame_, cloud->header.frame_id, transform); + + rclcpp::Time current_cloud_stamp = rclcpp::Time(cloud->header.stamp); + + if (is_motion_compensated_) { + transform = compute_transform_to_adjust_for_old_timestamp(oldest_stamp, current_cloud_stamp) * + transform; + } + + TransformStruct transform_struct; + transform_struct.translation_x = transform(0, 3); + transform_struct.translation_y = transform(1, 3); + transform_struct.translation_z = transform(2, 3); + transform_struct.m11 = transform(0, 0); + transform_struct.m12 = transform(0, 1); + transform_struct.m13 = transform(0, 2); + transform_struct.m21 = transform(1, 0); + transform_struct.m22 = transform(1, 1); + transform_struct.m23 = transform(1, 2); + transform_struct.m31 = transform(2, 0); + transform_struct.m32 = transform(2, 1); + transform_struct.m33 = transform(2, 2); + + auto & stream = cuda_concat_struct_map_[topic].stream; + transform_launch( + reinterpret_cast(cloud->data.get()), num_points, transform_struct, + output_points + concatenated_start_index, stream); + concatenated_start_index += num_points; + } + + concatenate_cloud_result.concatenate_cloud_ptr->header.frame_id = output_frame_; + concatenate_cloud_result.concatenate_cloud_ptr->width = concatenated_start_index; + concatenate_cloud_result.concatenate_cloud_ptr->height = 1; + concatenate_cloud_result.concatenate_cloud_ptr->point_step = sizeof(PointTypeStruct); + concatenate_cloud_result.concatenate_cloud_ptr->row_step = + concatenated_start_index * sizeof(PointTypeStruct); + concatenate_cloud_result.concatenate_cloud_ptr->fields = point_fields; + concatenate_cloud_result.concatenate_cloud_ptr->is_bigendian = false; + concatenate_cloud_result.concatenate_cloud_ptr->is_dense = true; + + // Second round is for when we need to publish sync pointclouds + if (publish_synchronized_pointcloud_) { + if (!concatenate_cloud_result.topic_to_transformed_cloud_map) { + // Initialize the map if it is not present + concatenate_cloud_result.topic_to_transformed_cloud_map = + std::unordered_map(); + } + + concatenated_start_index = 0; + + for (const auto & [topic, cloud] : topic_to_cloud_map) { + const std::size_t num_points = cloud->height * cloud->width; + const std::size_t data_size = cloud->height * cloud->row_step; + + auto & concat_struct = cuda_concat_struct_map_[topic]; + + if (data_size > concat_struct.max_pointcloud_size || !concat_struct.cloud_ptr) { + concat_struct.max_pointcloud_size = (data_size + 1024) / 1024 * 1024; + concat_struct.cloud_ptr = std::make_unique(); + concat_struct.cloud_ptr->data = cuda_blackboard::make_unique(data_size); + } + // convert to original sensor frame if necessary + + auto & output_cloud = (*concatenate_cloud_result.topic_to_transformed_cloud_map)[topic]; + bool need_transform_to_sensor_frame = (cloud->header.frame_id != output_frame_); + + output_cloud = std::move(concat_struct.cloud_ptr); + + auto & stream = cuda_concat_struct_map_[topic].stream; + + if (keep_input_frame_in_synchronized_pointcloud_ && need_transform_to_sensor_frame) { + Eigen::Matrix4f transform; + managed_tf_buffer_->get_transform(cloud->header.frame_id, output_frame_, transform); + + TransformStruct transform_struct; + transform_struct.translation_x = transform(0, 3); + transform_struct.translation_y = transform(1, 3); + transform_struct.translation_z = transform(2, 3); + transform_struct.m11 = transform(0, 0); + transform_struct.m12 = transform(0, 1); + transform_struct.m13 = transform(0, 2); + transform_struct.m21 = transform(1, 0); + transform_struct.m22 = transform(1, 1); + transform_struct.m23 = transform(1, 2); + transform_struct.m31 = transform(2, 0); + transform_struct.m32 = transform(2, 1); + transform_struct.m33 = transform(2, 2); + + transform_launch( + output_points + concatenated_start_index, num_points, transform_struct, + reinterpret_cast(output_cloud->data.get()), stream); + output_cloud->header.frame_id = cloud->header.frame_id; + } else { + cudaMemcpyAsync( + output_cloud->data.get(), output_points + concatenated_start_index, data_size, + cudaMemcpyDeviceToDevice, stream); + output_cloud->header.frame_id = output_frame_; + } + + output_cloud->header.stamp = cloud->header.stamp; + output_cloud->width = cloud->width; + output_cloud->height = cloud->height; + output_cloud->point_step = sizeof(PointTypeStruct); + output_cloud->row_step = cloud->width * sizeof(PointTypeStruct); + output_cloud->fields = point_fields; + output_cloud->is_bigendian = false; + output_cloud->is_dense = true; + + concatenated_start_index += cloud->height * cloud->width; + } + } + + // Sync all streams + for (const auto & [topic, cuda_concat_struct] : cuda_concat_struct_map_) { + cudaStreamSynchronize(cuda_concat_struct.stream); + } + + concatenate_cloud_result.concatenate_cloud_ptr->header.stamp = oldest_stamp; + + return concatenate_cloud_result; +} + +} // namespace autoware::pointcloud_preprocessor + +template class autoware::pointcloud_preprocessor::CombineCloudHandler< + autoware::pointcloud_preprocessor::CudaPointCloud2Traits>; diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.cu b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.cu new file mode 100644 index 0000000000000..88bd1094267d6 --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.cu @@ -0,0 +1,55 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_combine_cloud_handler_kernel.hpp" + +#include + +namespace autoware::pointcloud_preprocessor +{ + +__global__ void transform_kernel( + const PointTypeStruct * input_points, int num_points, TransformStruct transform, + PointTypeStruct * output_points) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_points) { + float x = input_points[idx].x; + float y = input_points[idx].y; + float z = input_points[idx].z; + + output_points[idx].x = + transform.m11 * x + transform.m12 * y + transform.m13 * z + transform.translation_x; + output_points[idx].y = + transform.m21 * x + transform.m22 * y + transform.m23 * z + transform.translation_y; + output_points[idx].z = + transform.m31 * x + transform.m32 * y + transform.m33 * z + transform.translation_z; + output_points[idx].intensity = input_points[idx].intensity; + output_points[idx].return_type = input_points[idx].return_type; + output_points[idx].channel = input_points[idx].channel; + } +} + +void transform_launch( + const PointTypeStruct * input_points, int num_points, TransformStruct transform, + PointTypeStruct * output_points, cudaStream_t & stream) +{ + constexpr int threads_per_block = 256; + const int block_per_grid = (num_points + threads_per_block - 1) / threads_per_block; + + transform_kernel<<>>( + input_points, num_points, transform, output_points); +} + +} // namespace autoware::pointcloud_preprocessor diff --git a/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.cpp b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.cpp new file mode 100644 index 0000000000000..fd44f2b5e35c3 --- /dev/null +++ b/sensing/autoware_cuda_pointcloud_preprocessor/src/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.cpp @@ -0,0 +1,73 @@ +// Copyright 2025 TIER IV, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_concatenate_and_time_sync_node.hpp" + +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_cloud_collector.hpp" +#include "autoware/cuda_pointcloud_preprocessor/cuda_concatenate_data/cuda_traits.hpp" +#include "autoware/pointcloud_preprocessor/utility/memory.hpp" + +#include +#include +#include +#include + +namespace autoware::pointcloud_preprocessor +{ + +template <> +void PointCloudConcatenateDataSynchronizerComponentTemplated::initialize() +{ + concatenated_cloud_publisher_ = + std::make_shared>( + *this, "output"); + + for (auto & topic : params_.input_topics) { + std::string new_topic = + replace_sync_topic_name_postfix(topic, params_.synchronized_pointcloud_postfix); + auto publisher = + std::make_shared>( + *this, new_topic); + topic_to_transformed_cloud_publisher_map_.insert({topic, publisher}); + } + + for (const std::string & topic : params_.input_topics) { + auto callback = [&](const cuda_blackboard::CudaPointCloud2::ConstSharedPtr msg) { + this->cloud_callback(msg, topic); + }; + + auto pointcloud_sub = + std::make_shared>( + *this, topic, false, callback); + pointcloud_subs_.push_back(pointcloud_sub); + } + + RCLCPP_DEBUG_STREAM( + get_logger(), + "Subscribing to " << params_.input_topics.size() << " user given topics as inputs:"); + + for (const auto & input_topic : params_.input_topics) { + RCLCPP_DEBUG_STREAM(get_logger(), " - " << input_topic); + } +} + +} // namespace autoware::pointcloud_preprocessor + +template class autoware::pointcloud_preprocessor:: + PointCloudConcatenateDataSynchronizerComponentTemplated< + autoware::pointcloud_preprocessor::CudaPointCloud2Traits>; + +#include +RCLCPP_COMPONENTS_REGISTER_NODE( + autoware::cuda_pointcloud_preprocessor::CudaPointCloudConcatenateDataSynchronizerComponent)