tier4
diff --git a/‎perception/lidar_transfusion/CMakeLists.txt
-52 b/‎perception/lidar_transfusion/CMakeLists.txt
-52
diff --git a/‎perception/lidar_transfusion/README.md
-18 b/‎perception/lidar_transfusion/README.md
-18
diff --git a/‎perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp
+1-1 b/‎perception/lidar_transfusion/include/lidar_transfusion/preprocess/pointcloud_densification.hpp
+1-1
diff --git a/‎perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp
+5-1 b/‎perception/lidar_transfusion/include/lidar_transfusion/preprocess/preprocess_kernel.hpp
+5-1
diff --git a/‎perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp
+3-5 b/‎perception/lidar_transfusion/include/lidar_transfusion/preprocess/voxel_generator.hpp
+3-5
diff --git a/‎perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp
-68 b/‎perception/lidar_transfusion/include/lidar_transfusion/ros_utils.hpp
-68
diff --git a/‎perception/lidar_transfusion/include/lidar_transfusion/utils.hpp
+46 b/‎perception/lidar_transfusion/include/lidar_transfusion/utils.hpp
+46
diff --git a/‎perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp
+5-4 b/‎perception/lidar_transfusion/lib/preprocess/pointcloud_densification.cpp
+5-4
diff --git a/‎perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu
+24-37 b/‎perception/lidar_transfusion/lib/preprocess/preprocess_kernel.cu
+24-37
@@ -137,58 +137,6 @@ if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
   if(BUILD_TESTING)
     find_package(ament_lint_auto REQUIRED)
     ament_lint_auto_find_test_dependencies()
-
-    find_package(ament_cmake_gtest REQUIRED)
-    ament_auto_add_gtest(test_detection_class_remapper
-      test/test_detection_class_remapper.cpp
-    )
-    ament_auto_add_gtest(test_ros_utils
-      test/test_ros_utils.cpp
-    )
-    ament_auto_add_gtest(test_nms
-      test/test_nms.cpp
-    )
-
-    # Temporary disabled, tracked by:
-    # https://github.com/autowarefoundation/autoware.universe/issues/7724
-    # ament_auto_add_gtest(test_voxel_generator
-    #   test/test_voxel_generator.cpp
-    # )
-
-    # # preprocess kernel test
-    # add_executable(test_preprocess_kernel
-    #   test/test_preprocess_kernel.cpp
-    # )
-    # target_include_directories(test_preprocess_kernel PUBLIC
-    #   ${test_preprocess_kernel_SOURCE_DIR}
-    # )
-    # target_link_libraries(test_preprocess_kernel
-    #   transfusion_cuda_lib
-    #   gtest
-    #   gtest_main
-    # )
-    # ament_add_test(test_preprocess_kernel
-    #   GENERATE_RESULT_FOR_RETURN_CODE_ZERO
-    #   COMMAND "$<TARGET_FILE:test_preprocess_kernel>"
-    # )
-
-    # # postprocess kernel test
-    # add_executable(test_postprocess_kernel
-    #   test/test_postprocess_kernel.cpp
-    # )
-    # target_include_directories(test_postprocess_kernel PUBLIC
-    #   ${test_postprocess_kernel_SOURCE_DIR}
-    # )
-    # target_link_libraries(test_postprocess_kernel
-    #   transfusion_cuda_lib
-    #   gtest
-    #   gtest_main
-    # )
-    # ament_add_test(test_postprocess_kernel
-    #   GENERATE_RESULT_FOR_RETURN_CODE_ZERO
-    #   COMMAND "$<TARGET_FILE:test_postprocess_kernel>"
-    # )
-
   endif()
 
   ament_auto_package(
 
@@ -59,24 +59,6 @@ ros2 launch lidar_transfusion lidar_transfusion.launch.xml log_level:=debug
 
 ## Assumptions / Known limits
 
-This library operates on raw cloud data (bytes). It is assumed that the input pointcloud message has following format:
-
-```python
-[
-  sensor_msgs.msg.PointField(name='x', offset=0, datatype=7, count=1),
-  sensor_msgs.msg.PointField(name='y', offset=4, datatype=7, count=1),
-  sensor_msgs.msg.PointField(name='z', offset=8, datatype=7, count=1),
-  sensor_msgs.msg.PointField(name='intensity', offset=12, datatype=2, count=1)
-]
-```
-
-This input may consist of other fields as well - shown format is required minimum.
-For debug purposes, you can validate your pointcloud topic using simple command:
-
-```bash
-ros2 topic echo <input_topic> --field fields
-```
-
 ## Trained Models
 
 You can download the onnx format of trained models by clicking on the links below.
 
@@ -51,7 +51,7 @@ class DensificationParam
 
 struct PointCloudWithTransform
 {
-  cuda::unique_ptr<uint8_t[]> data_d{nullptr};
+  cuda::unique_ptr<float[]> points_d{nullptr};
   std_msgs::msg::Header header;
   size_t num_points{0};
   Eigen::Affine3f affine_past2world;
 
@@ -56,8 +56,12 @@ class PreprocessCuda
     unsigned int * mask, float * voxels, unsigned int * pillar_num, float * voxel_features,
     unsigned int * voxel_num, unsigned int * voxel_idxs);
 
+  // cudaError_t generateVoxelsInput_launch(
+  //   uint8_t * cloud_data, CloudInfo & cloud_info, unsigned int points_agg, unsigned int
+  //   points_size, float time_lag, float * affine_transform, float * points);
+
   cudaError_t generateSweepPoints_launch(
-    const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
+    const float * input_points, size_t points_size, int input_point_step, float time_lag,
     const float * transform, float * output_points);
 
 private:
 
@@ -18,17 +18,15 @@
 #include "lidar_transfusion/cuda_utils.hpp"
 #include "lidar_transfusion/preprocess/pointcloud_densification.hpp"
 #include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
-#include "lidar_transfusion/ros_utils.hpp"
 #include "lidar_transfusion/transfusion_config.hpp"
+#include "lidar_transfusion/utils.hpp"
 
 #ifdef ROS_DISTRO_GALACTIC
 #include <tf2_eigen/tf2_eigen.h>
 #else
 #include <tf2_eigen/tf2_eigen.hpp>
 #endif
 
-#include <autoware_point_types/types.hpp>
-
 #include <sensor_msgs/msg/point_cloud2.hpp>
 
 #include <memory>
@@ -38,8 +36,8 @@
 
 namespace lidar_transfusion
 {
-constexpr size_t AFF_MAT_SIZE = 16;  // 4x4 matrix
-constexpr size_t MAX_CLOUD_STEP_SIZE = sizeof(autoware_point_types::PointXYZIRCAEDT);
+constexpr size_t AFF_MAT_SIZE = 16;         // 4x4 matrix
+constexpr size_t MAX_CLOUD_STEP_SIZE = 35;  // PointXYZIRCADT
 
 class VoxelGenerator
 {
 
@@ -17,84 +17,16 @@
 
 #include "lidar_transfusion/utils.hpp"
 
-#include <autoware_point_types/types.hpp>
-
 #include <autoware_perception_msgs/msg/detected_object_kinematics.hpp>
 #include <autoware_perception_msgs/msg/detected_objects.hpp>
 #include <autoware_perception_msgs/msg/object_classification.hpp>
 #include <autoware_perception_msgs/msg/shape.hpp>
-#include <sensor_msgs/msg/point_field.hpp>
 
-#include <cstdint>
 #include <string>
 #include <vector>
 
-#define CHECK_OFFSET(offset, structure, field) \
-  static_assert(                               \
-    offsetof(structure, field) == offset,      \
-    "Offset of " #field " in " #structure " does not match expected offset.")
-#define CHECK_TYPE(type, structure, field)                 \
-  static_assert(                                           \
-    std::is_same<decltype(structure::field), type>::value, \
-    "Type of " #field " in " #structure " does not match expected type.")
-#define CHECK_FIELD(offset, type, structure, field) \
-  CHECK_OFFSET(offset, structure, field);           \
-  CHECK_TYPE(type, structure, field)
-
 namespace lidar_transfusion
 {
-using sensor_msgs::msg::PointField;
-
-CHECK_FIELD(0, float, autoware_point_types::PointXYZIRCAEDT, x);
-CHECK_FIELD(4, float, autoware_point_types::PointXYZIRCAEDT, y);
-CHECK_FIELD(8, float, autoware_point_types::PointXYZIRCAEDT, z);
-CHECK_FIELD(12, uint8_t, autoware_point_types::PointXYZIRCAEDT, intensity);
-
-struct CloudInfo
-{
-  uint32_t x_offset{0};
-  uint32_t y_offset{sizeof(float)};
-  uint32_t z_offset{sizeof(float) * 2};
-  uint32_t intensity_offset{sizeof(float) * 3};
-  uint8_t x_datatype{PointField::FLOAT32};
-  uint8_t y_datatype{PointField::FLOAT32};
-  uint8_t z_datatype{PointField::FLOAT32};
-  uint8_t intensity_datatype{PointField::UINT8};
-  uint8_t x_count{1};
-  uint8_t y_count{1};
-  uint8_t z_count{1};
-  uint8_t intensity_count{1};
-  uint32_t point_step{sizeof(autoware_point_types::PointXYZIRCAEDT)};
-  bool is_bigendian{false};
-
-  bool operator!=(const CloudInfo & rhs) const
-  {
-    return x_offset != rhs.x_offset || y_offset != rhs.y_offset || z_offset != rhs.z_offset ||
-           intensity_offset != rhs.intensity_offset || x_datatype != rhs.x_datatype ||
-           y_datatype != rhs.y_datatype || z_datatype != rhs.z_datatype ||
-           intensity_datatype != rhs.intensity_datatype || x_count != rhs.x_count ||
-           y_count != rhs.y_count || z_count != rhs.z_count ||
-           intensity_count != rhs.intensity_count || is_bigendian != rhs.is_bigendian;
-  }
-
-  friend std::ostream & operator<<(std::ostream & os, const CloudInfo & info)
-  {
-    os << "x_offset: " << static_cast<int>(info.x_offset) << std::endl;
-    os << "y_offset: " << static_cast<int>(info.y_offset) << std::endl;
-    os << "z_offset: " << static_cast<int>(info.z_offset) << std::endl;
-    os << "intensity_offset: " << static_cast<int>(info.intensity_offset) << std::endl;
-    os << "x_datatype: " << static_cast<int>(info.x_datatype) << std::endl;
-    os << "y_datatype: " << static_cast<int>(info.y_datatype) << std::endl;
-    os << "z_datatype: " << static_cast<int>(info.z_datatype) << std::endl;
-    os << "intensity_datatype: " << static_cast<int>(info.intensity_datatype) << std::endl;
-    os << "x_count: " << static_cast<int>(info.x_count) << std::endl;
-    os << "y_count: " << static_cast<int>(info.y_count) << std::endl;
-    os << "z_count: " << static_cast<int>(info.z_count) << std::endl;
-    os << "intensity_count: " << static_cast<int>(info.intensity_count) << std::endl;
-    os << "is_bigendian: " << static_cast<int>(info.is_bigendian) << std::endl;
-    return os;
-  }
-};
 
 void box3DToDetectedObject(
   const Box3D & box3d, const std::vector<std::string> & class_names,
 
@@ -36,6 +36,52 @@ struct Box3D
   float yaw;
 };
 
+struct CloudInfo
+{
+  uint32_t x_offset;
+  uint32_t y_offset;
+  uint32_t z_offset;
+  uint32_t intensity_offset;
+  uint8_t x_datatype;
+  uint8_t y_datatype;
+  uint8_t z_datatype;
+  uint8_t intensity_datatype;
+  uint8_t x_count;
+  uint8_t y_count;
+  uint8_t z_count;
+  uint8_t intensity_count;
+  uint32_t point_step;
+  bool is_bigendian;
+
+  CloudInfo()
+  : x_offset(0),
+    y_offset(4),
+    z_offset(8),
+    intensity_offset(12),
+    x_datatype(7),
+    y_datatype(7),
+    z_datatype(7),
+    intensity_datatype(7),
+    x_count(1),
+    y_count(1),
+    z_count(1),
+    intensity_count(1),
+    point_step(16),
+    is_bigendian(false)
+  {
+  }
+
+  bool operator!=(const CloudInfo & rhs) const
+  {
+    return x_offset != rhs.x_offset || y_offset != rhs.y_offset || z_offset != rhs.z_offset ||
+           intensity_offset != rhs.intensity_offset || x_datatype != rhs.x_datatype ||
+           y_datatype != rhs.y_datatype || z_datatype != rhs.z_datatype ||
+           intensity_datatype != rhs.intensity_datatype || x_count != rhs.x_count ||
+           y_count != rhs.y_count || z_count != rhs.z_count ||
+           intensity_count != rhs.intensity_count || is_bigendian != rhs.is_bigendian;
+  }
+};
+
 enum NetworkIO { voxels = 0, num_points, coors, cls_score, dir_pred, bbox_pred, ENUM_SIZE };
 
 // cspell: ignore divup
 
@@ -93,15 +93,16 @@ void PointCloudDensification::enqueue(
   affine_world2current_ = affine_world2current;
   current_timestamp_ = rclcpp::Time(msg.header.stamp).seconds();
 
-  auto data_d = cuda::make_unique<uint8_t[]>(
-    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(uint8_t));
+  assert(sizeof(uint8_t) * msg.width * msg.height * msg.point_step % sizeof(float) == 1);
+  auto points_d = cuda::make_unique<float[]>(
+    sizeof(uint8_t) * msg.width * msg.height * msg.point_step / sizeof(float));
 
   CHECK_CUDA_ERROR(cudaMemcpyAsync(
-    data_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
+    points_d.get(), msg.data.data(), sizeof(uint8_t) * msg.width * msg.height * msg.point_step,
     cudaMemcpyHostToDevice, stream_));
 
   PointCloudWithTransform pointcloud = {
-    std::move(data_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
+    std::move(points_d), msg.header, msg.width * msg.height, affine_world2current.inverse()};
 
   pointcloud_cache_.push_front(std::move(pointcloud));
 }
 
@@ -31,8 +31,6 @@
 #include "lidar_transfusion/cuda_utils.hpp"
 #include "lidar_transfusion/preprocess/preprocess_kernel.hpp"
 
-#include <cstdint>
-
 namespace lidar_transfusion
 {
 
@@ -101,12 +99,9 @@ __global__ void generateVoxels_random_kernel(
 cudaError_t PreprocessCuda::generateVoxels_random_launch(
   float * points, unsigned int points_size, unsigned int * mask, float * voxels)
 {
-  if (points_size == 0) {
-    return cudaGetLastError();
-  }
-  dim3 blocks(divup(points_size, config_.threads_for_voxel_));
-  dim3 threads(config_.threads_for_voxel_);
-
+  int threadNum = config_.threads_for_voxel_;
+  dim3 blocks((points_size + threadNum - 1) / threadNum);
+  dim3 threads(threadNum);
   generateVoxels_random_kernel<<<blocks, threads, 0, stream_>>>(
     points, points_size, config_.min_x_range_, config_.max_x_range_, config_.min_y_range_,
     config_.max_y_range_, config_.min_z_range_, config_.max_z_range_, config_.voxel_x_size_,
@@ -170,48 +165,40 @@ cudaError_t PreprocessCuda::generateBaseFeatures_launch(
 }
 
 __global__ void generateSweepPoints_kernel(
-  const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
+  const float * input_points, size_t points_size, int input_point_step, float time_lag,
   const float * transform_array, int num_features, float * output_points)
 {
   int point_idx = blockIdx.x * blockDim.x + threadIdx.x;
   if (point_idx >= points_size) return;
 
-  union {
-    uint32_t raw{0};
-    float value;
-  } input_x, input_y, input_z;
-
-#pragma unroll
-  for (int i = 0; i < 4; i++) {  // 4 bytes for float32
-    input_x.raw |= input_data[point_idx * input_point_step + i] << i * 8;
-    input_y.raw |= input_data[point_idx * input_point_step + i + 4] << i * 8;
-    input_z.raw |= input_data[point_idx * input_point_step + i + 8] << i * 8;
-  }
-
-  float input_intensity = static_cast<float>(input_data[point_idx * input_point_step + 12]);
-
-  output_points[point_idx * num_features] =
-    transform_array[0] * input_x.value + transform_array[4] * input_y.value +
-    transform_array[8] * input_z.value + transform_array[12];
-  output_points[point_idx * num_features + 1] =
-    transform_array[1] * input_x.value + transform_array[5] * input_y.value +
-    transform_array[9] * input_z.value + transform_array[13];
-  output_points[point_idx * num_features + 2] =
-    transform_array[2] * input_x.value + transform_array[6] * input_y.value +
-    transform_array[10] * input_z.value + transform_array[14];
-  output_points[point_idx * num_features + 3] = input_intensity;
+  const float input_x = input_points[point_idx * input_point_step + 0];
+  const float input_y = input_points[point_idx * input_point_step + 1];
+  const float input_z = input_points[point_idx * input_point_step + 2];
+  const float intensity = input_points[point_idx * input_point_step + 3];
+
+  output_points[point_idx * num_features] = transform_array[0] * input_x +
+                                            transform_array[4] * input_y +
+                                            transform_array[8] * input_z + transform_array[12];
+  output_points[point_idx * num_features + 1] = transform_array[1] * input_x +
+                                                transform_array[5] * input_y +
+                                                transform_array[9] * input_z + transform_array[13];
+  output_points[point_idx * num_features + 2] = transform_array[2] * input_x +
+                                                transform_array[6] * input_y +
+                                                transform_array[10] * input_z + transform_array[14];
+  output_points[point_idx * num_features + 3] = intensity;
   output_points[point_idx * num_features + 4] = time_lag;
 }
 
 cudaError_t PreprocessCuda::generateSweepPoints_launch(
-  const uint8_t * input_data, size_t points_size, int input_point_step, float time_lag,
+  const float * input_points, size_t points_size, int input_point_step, float time_lag,
   const float * transform_array, float * output_points)
 {
-  dim3 blocks(divup(points_size, config_.threads_for_voxel_));
-  dim3 threads(config_.threads_for_voxel_);
+  int threadNum = config_.threads_for_voxel_;
+  dim3 blocks((points_size + threadNum - 1) / threadNum);
+  dim3 threads(threadNum);
 
   generateSweepPoints_kernel<<<blocks, threads, 0, stream_>>>(
-    input_data, points_size, input_point_step, time_lag, transform_array,
+    input_points, points_size, input_point_step, time_lag, transform_array,
     config_.num_point_feature_size_, output_points);
 
   cudaError_t err = cudaGetLastError();
Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ class DensificationParam`
`51`	`51`
`52`	`52`	`struct PointCloudWithTransform`
`53`	`53`	`{`
`54`		`- cuda::unique_ptr<uint8_t[]> data_d{nullptr};`
	`54`	`+ cuda::unique_ptr<float[]> points_d{nullptr};`
`55`	`55`	`std_msgs::msg::Header header;`
`56`	`56`	`size_t num_points{0};`
`57`	`57`	`Eigen::Affine3f affine_past2world;`