tier4
diff --git a/‎perception/lidar_centerpoint/CMakeLists.txt
+43-29 b/‎perception/lidar_centerpoint/CMakeLists.txt
+43-29
diff --git a/‎perception/lidar_centerpoint/README.md
+5-26 b/‎perception/lidar_centerpoint/README.md
+5-26
diff --git a/‎perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp
+3-14 b/‎perception/lidar_centerpoint/include/lidar_centerpoint/node.hpp
+3-14
diff --git a/‎perception/lidar_centerpoint/launch/lidar_centerpoint.launch.xml
+2-2 b/‎perception/lidar_centerpoint/launch/lidar_centerpoint.launch.xml
+2-2
diff --git a/‎perception/lidar_centerpoint/lib/include/centerpoint_trt.hpp
+44-48 b/‎perception/lidar_centerpoint/lib/include/centerpoint_trt.hpp
+44-48
@@ -52,23 +52,27 @@ else()
   set(TRT_AVAIL OFF)
 endif()
 
-option(TORCH_AVAIL "Torch available" OFF)
-if(CUDA_FOUND)
-  set(Torch_DIR /usr/local/libtorch/share/cmake/Torch)
-  find_package(Torch)
-  if(TORCH_FOUND)
-    if(CUDA_VERBOSE)
-      message(STATUS "TORCH_INCLUDE_DIRS: ${TORCH_INCLUDE_DIRS}")
-      message(STATUS "TORCH_LIBRARIES: ${TORCH_LIBRARIES}")
-    endif()
-    set(TORCH_AVAIL ON)
-  else()
-  message("Torch NOT FOUND")
-  set(TORCH_AVAIL OFF)
+# set flags for CUDNN availability
+option(CUDNN_AVAIL "CUDNN available" OFF)
+# try to find the CUDNN module
+find_library(CUDNN_LIBRARY
+NAMES libcudnn.so${__cudnn_ver_suffix} libcudnn${__cudnn_ver_suffix}.dylib ${__cudnn_lib_win_name}
+PATHS $ENV{LD_LIBRARY_PATH} ${__libpath_cudart} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
+PATH_SUFFIXES lib lib64 bin
+DOC "CUDNN library."
+)
+if(CUDNN_LIBRARY)
+  if(CUDA_VERBOSE)
+    message(STATUS "CUDNN is available!")
+    message(STATUS "CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
   endif()
+  set(CUDNN_AVAIL ON)
+else()
+  message("CUDNN is NOT Available")
+  set(CUDNN_AVAIL OFF)
 endif()
 
-if(TRT_AVAIL AND CUDA_AVAIL AND TORCH_AVAIL)
+if(TRT_AVAIL AND CUDA_AVAIL AND CUDNN_AVAIL)
 # Download trained models
   find_program(GDOWN_AVAIL "gdown")
   if(NOT GDOWN_AVAIL)
@@ -84,53 +88,63 @@ if(TRT_AVAIL AND CUDA_AVAIL AND TORCH_AVAIL)
     set(FILE_PATH ${DATA_PATH}/${FILE_NAME})
     if(EXISTS ${FILE_PATH})
       file(MD5 ${FILE_PATH} EXISTING_FILE_HASH)
-      if(NOT ${FILE_HASH} EQUAL ${EXISTING_FILE_HASH})
-        message(STATUS "... file hash changes. Downloading now ...")
+      if(${FILE_HASH} EQUAL ${EXISTING_FILE_HASH})
+        message(STATUS "File already exists.")
+      else()
+        message(STATUS "File hash changes. Downloading now ...")
         execute_process(COMMAND gdown --quiet https://drive.google.com//uc?id=${GFILE_ID} -O ${FILE_PATH})
+        # file(MD5 ${FILE_PATH} DOWNLOADED_FILE_HASH)  # disable to pass ci
+        message(STATUS "Downloaded file hash: ${DOWNLOADED_FILE_HASH}")
       endif()
     else()
-      message(STATUS "... file doesn't exists. Downloading now ...")
+      message(STATUS "File doesn't exists. Downloading now ...")
       execute_process(COMMAND gdown --quiet https://drive.google.com//uc?id=${GFILE_ID} -O ${FILE_PATH})
+      # file(MD5 ${FILE_PATH} DOWNLOADED_FILE_HASH)  # disable to pass ci
+      message(STATUS "Downloaded file hash: ${DOWNLOADED_FILE_HASH}")
     endif()
   endfunction()
 
   # default model
-  download(pts_voxel_encoder_default.onnx 1_8OCQmrPm_R4ZVh70QsS9HZo6uGrlbgz 01b860612e497591c4375d90dff61ef7)
-  download(pts_voxel_encoder_default.pt 1RZ7cuDnI-RBrDiWe-2vEs16mR_z0e9Uo 33136caa97e3bcef2cf3e04bbc93d1e4)
-  download(pts_backbone_neck_head_default.onnx 1UxDyt8T-TMJS7Ujx-1vbbqGRfDbMUZg2 e23a8ad4ea440f923e44dbe072b070da)
-  download(pts_backbone_neck_head_default.pt 1toAhmOriX8bwVI-ohuas9_2EBZnltoXh eb0df29b30acf9c1082ac4490af0bbc5)
+  download(pts_voxel_encoder_default.onnx 1KFhmA4oFT6CtZx5806QeMzn5H2tKa3oD 410f730c537968cb27fbd70c941849a8)
+  download(pts_backbone_neck_head_default.onnx 1iyk5VoQ4uNBGPZwypVZIMjSuSYAI1RxP e97c165c7877222c0e27e44409a07517)
 
   # aip_x2 model
-  download(pts_voxel_encoder_aip_x2.onnx 1x-NAHQ3W0lbLmjJlrL6Nhvdq8yz6Ux0n 65eeb95c5e48ebfe6894146cdb48c160)
-  download(pts_voxel_encoder_aip_x2.pt 1jzKopAhXWjnEgo_v8rtYy0hQIayUE-oL 4db81ce8edc6571aa0afb1ae43ee72e9)
-  download(pts_backbone_neck_head_aip_x2.onnx 1l2fdIQcBWr3-6stVoNkudnL4OZaPqmNT a33c8910fd9c9c910b10904d3cd96717)
-  download(pts_backbone_neck_head_aip_x2.pt 18iOAlRsjvcWoUG9KiL1PlD7OY5mi9BSw 274fdf1580dd899e36c050c1366f1883)
+  download(pts_voxel_encoder_aip_x2.onnx 13aYPRHx17Ge4BqxzW9drAUSWTppjtUV5 3ae5e9efd7b2ed12115e6f0b28cac58d)
+  download(pts_backbone_neck_head_aip_x2.onnx 14PJ_L3Jpz6Yi8GzoctVOEbGWcaCLArGp 6a406a19e05660677c162486ab332de8)
 
   find_package(ament_cmake_auto REQUIRED)
   ament_auto_find_build_dependencies()
 
   include_directories(
     lib/include
     ${CUDA_INCLUDE_DIRS}
-    ${TORCH_INCLUDE_DIRS}
   )
 
   ### centerpoint ###
   ament_auto_add_library(centerpoint SHARED
+    lib/src/centerpoint_trt.cpp
     lib/src/pointcloud_densification.cpp
     lib/src/voxel_generator.cpp
-    lib/src/centerpoint_trt.cpp
     lib/src/tensorrt_wrapper.cpp
     lib/src/network_trt.cpp
+    lib/src/utils.cpp
+  )
+
+  cuda_add_library(centerpoint_cuda_libraries SHARED
+    lib/src/circle_nms_kernel.cu
+    lib/src/postprocess_kernel.cu
+    lib/src/preprocess_kernel.cu
+    lib/src/scatter_kernel.cu
   )
 
   target_link_libraries(centerpoint
     ${NVINFER}
     ${NVONNXPARSER}
-    ${NVINFER_PLUGIN}
     ${CUDA_LIBRARIES}
     ${CUBLAS_LIBRARIES}
-    ${TORCH_LIBRARIES}
+    ${CUDA_curand_LIBRARY}
+    ${CUDNN_LIBRARY}
+    centerpoint_cuda_libraries
   )
 
   ## node ##
 
@@ -20,10 +20,9 @@ We trained the models using <https://github.com/open-mmlab/mmdetection3d>.
 
 ### Output
 
-| Name                               | Type                                                  | Description              |
-| ---------------------------------- | ----------------------------------------------------- | ------------------------ |
-| `~/output/objects`                 | `autoware_auto_perception_msgs::msg::DetectedObjects` | detected objects         |
-| `~/debug/pointcloud_densification` | `sensor_msgs::msg::PointCloud2`                       | densification pointcloud |
+| Name               | Type                                                  | Description      |
+| ------------------ | ----------------------------------------------------- | ---------------- |
+| `~/output/objects` | `autoware_auto_perception_msgs::msg::DetectedObjects` | detected objects |
 
 ## Parameters
 
@@ -34,30 +33,16 @@ We trained the models using <https://github.com/open-mmlab/mmdetection3d>.
 | `score_threshold`               | float  | `0.4`         | detected objects with score less than threshold are ignored |
 | `densification_world_frame_id`  | string | `map`         | the world frame id to fuse multi-frame pointcloud           |
 | `densification_num_past_frames` | int    | `1`           | the number of past frames to fuse with the current frame    |
-| `use_encoder_trt`               | bool   | `false`       | use TensorRT VoxelFeatureEncoder                            |
-| `use_head_trt`                  | bool   | `true`        | use TensorRT DetectionHead                                  |
 | `trt_precision`                 | string | `fp16`        | TensorRT inference precision: `fp32` or `fp16`              |
 | `encoder_onnx_path`             | string | `""`          | path to VoxelFeatureEncoder ONNX file                       |
 | `encoder_engine_path`           | string | `""`          | path to VoxelFeatureEncoder TensorRT Engine file            |
-| `encoder_pt_path`               | string | `""`          | path to VoxelFeatureEncoder TorchScript file                |
 | `head_onnx_path`                | string | `""`          | path to DetectionHead ONNX file                             |
 | `head_engine_path`              | string | `""`          | path to DetectionHead TensorRT Engine file                  |
-| `head_pt_path`                  | string | `""`          | path to DetectionHead TorchScript file                      |
 
 ## Assumptions / Known limits
 
 - The `object.existence_probability` is stored the value of classification confidence of a DNN, not probability.
 
-- If you have an error like `'GOMP_4.5' not found`, replace the OpenMP library in libtorch.
-
-  ```bash
-  sudo apt install libgomp1 -y
-  sudo rm /usr/local/libtorch/lib/libgomp-75eea7e8.so.1
-  sudo ln -s /usr/lib/x86_64-linux-gnu/libgomp.so.1 /usr/local/libtorch/lib/libgomp-75eea7e8.so.1
-  ```
-
-- if `use_encoder_trt` is set `use_encoder_trt`, more GPU memory is allocated.
-
 ## (Optional) Error detection and handling
 
 <!-- Write how to detect errors and how to recover from them.
@@ -92,15 +77,9 @@ Example:
 
 [5] <https://github.com/open-mmlab/OpenPCDet>
 
-[6] <https://github.com/poodarchu/Det3D>
-
-[7] <https://github.com/xingyizhou/CenterNet>
-
-[8] <https://github.com/lzccccc/SMOKE>
-
-[9] <https://github.com/yukkysaito/autoware_perception>
+[6] <https://github.com/yukkysaito/autoware_perception>
 
-[10] <https://github.com/pytorch/pytorch>
+[7] <https://github.com/NVIDIA-AI-IOT/CUDA-PointPillars>
 
 ## (Optional) Future extensions / Unimplemented parts
 
 
@@ -1,4 +1,4 @@
-// Copyright 2021 Tier IV, Inc.
+// Copyright 2021 TIER IV, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -40,7 +40,8 @@ class LidarCenterPointNode : public rclcpp::Node
 
 private:
   void pointCloudCallback(const sensor_msgs::msg::PointCloud2::ConstSharedPtr input_pointcloud_msg);
-
+  void box3DToDetectedObject(
+    const Box3D & box3d, autoware_auto_perception_msgs::msg::DetectedObject & obj);
   static uint8_t getSemanticType(const std::string & class_name);
   static bool isCarLikeVehicleLabel(const uint8_t label);
 
@@ -49,20 +50,8 @@ class LidarCenterPointNode : public rclcpp::Node
 
   rclcpp::Subscription<sensor_msgs::msg::PointCloud2>::SharedPtr pointcloud_sub_;
   rclcpp::Publisher<autoware_auto_perception_msgs::msg::DetectedObjects>::SharedPtr objects_pub_;
-  rclcpp::Publisher<sensor_msgs::msg::PointCloud2>::SharedPtr pointcloud_pub_;
 
   float score_threshold_{0.0};
-  bool use_encoder_trt_{false};
-  bool use_head_trt_{false};
-  std::string trt_precision_;
-
-  std::string encoder_onnx_path_;
-  std::string encoder_engine_path_;
-  std::string encoder_pt_path_;
-  std::string head_onnx_path_;
-  std::string head_engine_path_;
-  std::string head_pt_path_;
-
   std::vector<std::string> class_names_;
   bool rename_car_to_truck_and_bus_{false};
 
 
@@ -13,9 +13,9 @@
       <param name="score_threshold" value="0.45"/>
       <param name="densification_world_frame_id" value="map"/>
       <param name="densification_num_past_frames" value="1"/>
-      <param name="use_head_trt" value="true"/>
       <param name="trt_precision" value="fp16"/>
-      <param name="encoder_pt_path" value="$(var model_path)/pts_voxel_encoder_$(var model_name).pt"/>
+      <param name="encoder_onnx_path" value="$(var model_path)/pts_voxel_encoder_$(var model_name).onnx"/>
+      <param name="encoder_engine_path" value="$(var model_path)/pts_voxel_encoder_$(var model_name).engine"/>
       <param name="head_onnx_path" value="$(var model_path)/pts_backbone_neck_head_$(var model_name).onnx"/>
       <param name="head_engine_path" value="$(var model_path)/pts_backbone_neck_head_$(var model_name).engine"/>
       <param from="$(var model_param_path)"/>
 
@@ -1,4 +1,4 @@
-// Copyright 2021 Tier IV, Inc.
+// Copyright 2021 TIER IV, Inc.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -18,14 +18,13 @@
 #include <config.hpp>
 #include <cuda_utils.hpp>
 #include <network_trt.hpp>
-#include <tier4_autoware_utils/math/constants.hpp>
+#include <postprocess_kernel.hpp>
 #include <voxel_generator.hpp>
 
 #include <sensor_msgs/msg/point_cloud2.hpp>
 
 #include <pcl/point_cloud.h>
 #include <pcl/point_types.h>
-#include <torch/script.h>
 
 #include <memory>
 #include <string>
@@ -37,75 +36,72 @@ namespace centerpoint
 class NetworkParam
 {
 public:
-  NetworkParam(
-    std::string onnx_path, std::string engine_path, std::string pt_path, std::string trt_precision,
-    const bool use_trt)
+  NetworkParam(std::string onnx_path, std::string engine_path, std::string trt_precision)
   : onnx_path_(std::move(onnx_path)),
     engine_path_(std::move(engine_path)),
-    pt_path_(std::move(pt_path)),
-    trt_precision_(std::move(trt_precision)),
-    use_trt_(use_trt)
+    trt_precision_(std::move(trt_precision))
   {
   }
 
   std::string onnx_path() const { return onnx_path_; }
   std::string engine_path() const { return engine_path_; }
-  std::string pt_path() const { return pt_path_; }
   std::string trt_precision() const { return trt_precision_; }
-  bool use_trt() const { return use_trt_; }
 
 private:
   std::string onnx_path_;
   std::string engine_path_;
-  std::string pt_path_;
   std::string trt_precision_;
-  bool use_trt_;
 };
 
 class CenterPointTRT
 {
 public:
   explicit CenterPointTRT(
-    const int num_class, const NetworkParam & encoder_param, const NetworkParam & head_param,
-    const DensificationParam & densification_param);
+    const std::size_t num_class, const float score_threshold, const NetworkParam & encoder_param,
+    const NetworkParam & head_param, const DensificationParam & densification_param);
 
   ~CenterPointTRT();
 
-  std::vector<float> detect(
-    const sensor_msgs::msg::PointCloud2 &, const tf2_ros::Buffer & tf_buffer);
+  bool detect(
+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer,
+    std::vector<Box3D> & det_boxes3d);
 
 private:
-  bool initPtr(bool use_encoder_trt, bool use_head_trt);
-
-  bool loadTorchScript(torch::jit::script::Module & module, const std::string & model_path);
-
-  static at::Tensor createInputFeatures(
-    const at::Tensor & voxels, const at::Tensor & coords, const at::Tensor & voxel_num_points);
-
-  static at::Tensor scatterPillarFeatures(
-    const at::Tensor & pillar_features, const at::Tensor & coordinates);
-
-  at::Tensor generatePredictedBoxes();
-
-  std::unique_ptr<VoxelGeneratorTemplate> vg_ptr_ = nullptr;
-  torch::jit::script::Module encoder_pt_;
-  torch::jit::script::Module head_pt_;
-  std::unique_ptr<VoxelEncoderTRT> encoder_trt_ptr_ = nullptr;
-  std::unique_ptr<HeadTRT> head_trt_ptr_ = nullptr;
-  c10::Device device_ = torch::kCUDA;
-  cudaStream_t stream_ = nullptr;
-
-  int num_class_{0};
-  at::Tensor voxels_t_;
-  at::Tensor coordinates_t_;
-  at::Tensor num_points_per_voxel_t_;
-  at::Tensor output_pillar_feature_t_;
-  at::Tensor output_heatmap_t_;
-  at::Tensor output_offset_t_;
-  at::Tensor output_z_t_;
-  at::Tensor output_dim_t_;
-  at::Tensor output_rot_t_;
-  at::Tensor output_vel_t_;
+  void initPtr();
+
+  bool preprocess(
+    const sensor_msgs::msg::PointCloud2 & input_pointcloud_msg, const tf2_ros::Buffer & tf_buffer);
+
+  void inference();
+
+  void postProcess(std::vector<Box3D> & det_boxes3d);
+
+  std::unique_ptr<VoxelGeneratorTemplate> vg_ptr_{nullptr};
+  std::unique_ptr<VoxelEncoderTRT> encoder_trt_ptr_{nullptr};
+  std::unique_ptr<HeadTRT> head_trt_ptr_{nullptr};
+  std::unique_ptr<PostProcessCUDA> post_proc_ptr_{nullptr};
+  cudaStream_t stream_{nullptr};
+
+  bool verbose_{false};
+  std::size_t num_class_{0};
+  std::size_t num_voxels_{0};
+  std::size_t encoder_in_feature_size_{0};
+  std::size_t spatial_features_size_{0};
+  std::vector<float> voxels_;
+  std::vector<int> coordinates_;
+  std::vector<float> num_points_per_voxel_;
+  cuda::unique_ptr<float[]> voxels_d_{nullptr};
+  cuda::unique_ptr<int[]> coordinates_d_{nullptr};
+  cuda::unique_ptr<float[]> num_points_per_voxel_d_{nullptr};
+  cuda::unique_ptr<float[]> encoder_in_features_d_{nullptr};
+  cuda::unique_ptr<float[]> pillar_features_d_{nullptr};
+  cuda::unique_ptr<float[]> spatial_features_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_heatmap_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_offset_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_z_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_dim_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_rot_d_{nullptr};
+  cuda::unique_ptr<float[]> head_out_vel_d_{nullptr};
 };
 
 }  // namespace centerpoint