From 767a93627da20a9f7bf8da4913c7c2a0ca59f8bd Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Thu, 24 Aug 2023 10:35:32 +0900
Subject: [PATCH 01/39] add segmentation model

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>

fix: add multitask for segment

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../tensorrt_common/tensorrt_common.hpp       |   4 +
 .../include/tensorrt_yolox/preprocess.hpp     |  16 +-
 .../include/tensorrt_yolox/tensorrt_yolox.hpp |  53 ++++++
 .../launch/yolox_s_plus_opt.launch.xml        |   2 +-
 perception/tensorrt_yolox/src/preprocess.cu   |  35 ++++
 .../tensorrt_yolox/src/tensorrt_yolox.cpp     | 164 +++++++++++++++++-
 6 files changed, 271 insertions(+), 3 deletions(-)
diff --git a/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp b/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp
index aabaea7ca6339..6691c1fb9e97d 100644
--- a/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp
+++ b/common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp
@@ -15,7 +15,9 @@
 #ifndef TENSORRT_COMMON__TENSORRT_COMMON_HPP_
 #define TENSORRT_COMMON__TENSORRT_COMMON_HPP_
 
+#ifndef YOLOX_STANDALONE
 #include <rclcpp/rclcpp.hpp>
+#endif
 
 #include <NvInfer.h>
 #include <NvOnnxParser.h>
@@ -86,6 +88,7 @@ struct BuildConfig
     profile_per_layer(profile_per_layer),
     clip_value(clip_value)
   {
+#ifndef YOLOX_STANDALONE
     if (
       std::find(valid_calib_type.begin(), valid_calib_type.end(), calib_type_str) ==
       valid_calib_type.end()) {
@@ -95,6 +98,7 @@ struct BuildConfig
               << "Default calibration type will be used: MinMax" << std::endl;
       std::cerr << message.str();
     }
+#endif
   }
 };
 
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
index 3549ae35e70ea..c102432d204c9 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
@@ -179,6 +179,20 @@ extern void crop_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
 extern void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
   float * dst, unsigned char * src, int d_w, int d_h, int d_c, Roi * d_roi, int s_w, int s_h,
   int s_c, int batch, float norm, cudaStream_t stream);
-}  // namespace tensorrt_yolox
 
+/**
+ * @brief Argmax on GPU
+ * @param[out] dst processed image
+ * @param[in] src probabilty map
+ * @param[in] d_w width for output
+ * @param[in] d_h height for output
+ * @param[in] s_w width for input
+ * @param[in] s_h height for input
+ * @param[in] s_c channel for input
+ * @param[in] batch batch size
+ * @param[in] stream cuda stream
+ */
+extern void argmax_gpu(
+  unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch, cudaStream_t stream);
+}  // namespace tensorrt_yolox
 #endif  // TENSORRT_YOLOX__PREPROCESS_HPP_
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
index c42222a70c96b..6663ed6d43ed6 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -52,6 +52,13 @@ struct GridAndStride
   int stride;
 };
 
+typedef struct Colormap_
+{
+  int id;
+  std::string name;
+  std::vector<unsigned char> color;
+} Colormap;
+  
 /**
  * @class TrtYoloX
  * @brief TensorRT YOLOX for faster inference
@@ -130,6 +137,19 @@ class TrtYoloX
    */
   void printProfiling(void);
 
+  /**
+   * @brief get num for multitask heads
+   */  
+  int getMultitaskNum(void);
+
+  /**
+   * @brief get colorlized masks from index using specifief colormap
+   * @param[out] cmask colorlized mask
+   * @param[in] index multitask index
+   * @param[in] colormap colormap for masks
+   */  
+  cv::Mat getColorizedMask(int index, std::vector<Colormap> & colormap);  
+  
 private:
   /**
    * @brief run preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
@@ -206,6 +226,26 @@ class TrtYoloX
   void nmsSortedBboxes(
     const ObjectArray & face_objects, std::vector<int> & picked, float nms_threshold) const;
 
+  /**
+   * @brief get a mask image for a segmentation head
+   * @param[out] argmax argmax results
+   * @param[in] prob probmap
+   * @param[in] dims dimension for probmap
+   * @param[in] out_w mask width excluding letterbox
+   * @param[in] out_h mask height excluding letterbox
+   */  
+  cv::Mat getMaskImage(float *prob, nvinfer1::Dims dims, int out_w, int out_h);
+
+  /**
+   * @brief get a mask image on GPUs for a segmentation head
+   * @param[out] mask image
+   * @param[in] prob probablity map on device
+   * @param[in] out_w mask width excluding letterbox
+   * @param[in] out_h mask height excluding letterbox
+   * @param[in] b current batch
+   */  
+  cv::Mat getMaskImageGpu(float *d_prob, nvinfer1::Dims dims, int out_w, int out_h, int b);      
+  
   std::unique_ptr<tensorrt_common::TrtCommon> trt_common_;
 
   std::vector<float> input_h_;
@@ -249,6 +289,19 @@ class TrtYoloX
   CudaUniquePtrHost<Roi[]> roi_h_;
   // device pointer for ROI
   CudaUniquePtr<Roi[]> roi_d_;
+
+  // flag whether model has multitasks
+  int multitask_;
+  // buff size for segmentation heads
+  CudaUniquePtr<float[]> segmentation_out_prob_d_;
+  CudaUniquePtrHost<float[]> segmentation_out_prob_h_;
+  size_t segmentation_out_elem_num_;
+  size_t segmentation_out_elem_num_per_batch_;
+  std::vector<cv::Mat> segmentation_masks_;
+  // host buffer for argmax postprocessing on GPU
+  CudaUniquePtrHost<unsigned char[]> argmax_buf_h_;
+  // device buffer for argmax postprocessing  on GPU
+  CudaUniquePtr<unsigned char[]> argmax_buf_d_;  
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index dd15eda2913ce..2a5219e4ac498 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -3,7 +3,7 @@
   <!-- cspell:ignore finetune -->
   <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
   <arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
-  <arg name="model_name" default="yolox-sPlus-T4-960x960-pseudo-finetune"/>
+  <arg name="model_name" default="yoloXP-sPlus-T4-960x960-pseudo-finetune-semseg"/>
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
   <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
   <arg name="use_decompress" default="true" description="use image decompress"/>
diff --git a/perception/tensorrt_yolox/src/preprocess.cu b/perception/tensorrt_yolox/src/preprocess.cu
index 3c3087c536f10..f384de2975aa4 100644
--- a/perception/tensorrt_yolox/src/preprocess.cu
+++ b/perception/tensorrt_yolox/src/preprocess.cu
@@ -594,4 +594,39 @@ void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
     cuda_gridsize(N), block, 0, stream>>>(N, dst, src, d_h, d_w, s_h, s_w, d_roi, norm, batch);
 }
 
+__global__ void argmax_gpu_kernel(
+  int N, unsigned char * dst, float * src, int dst_h, int dst_w, int src_c, int src_h, int src_w,
+  int batch)
+{
+  // NHWC
+  int index = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
+
+  if (index >= N) return;
+  int c = 0;
+  int w = index % dst_w;
+  int h = index / (dst_w);
+
+  int b;
+  for (b = 0; b < batch; b++) {
+    float max_prob = 0.0;
+    int max_index = 0;
+    int dst_index = w + dst_w * h + b * dst_h * dst_w;
+    for (c = 0; c < src_c; c++) {
+      int src_index = w + src_w * h + c * src_h * src_w + b * src_c * src_h * src_w;
+      max_index = max_prob < src[src_index] ? c : max_index;
+      max_prob = max_prob < src[src_index] ? src[src_index] : max_prob;
+    }
+    dst[dst_index] = max_index;
+  }
+}
+
+void argmax_gpu(
+  unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch,
+  cudaStream_t stream)
+{
+  int N = d_w * d_h;
+  argmax_gpu_kernel<<<cuda_gridsize(N), block, 0, stream>>>(
+    N, dst, src, d_h, d_w, s_c, s_h, s_w, batch);
+}
+
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index ea7857c3768aa..f157a58056353 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -112,6 +112,7 @@ TrtYoloX::TrtYoloX(
   src_height_ = -1;
   norm_factor_ = norm_factor;
   batch_size_ = batch_config[2];
+  multitask_ = 0;
   if (precision == "int8") {
     if (build_config.clip_value <= 0.0) {
       if (calibration_image_list_path.empty()) {
@@ -196,9 +197,18 @@ TrtYoloX::TrtYoloX(
       needs_output_decode_ = false;
       break;
     default:
+      needs_output_decode_ = true;
+      // The following three values are considered only if the specified model is plain one
+      num_class_ = num_class;
+      score_threshold_ = score_threshold;
+      nms_threshold_ = nms_threshold;
+      // Todo : Support multiple segmentation heads
+      multitask_++;
+      /*
       std::stringstream s;
       s << "\"" << model_path << "\" is unsupported format";
       throw std::runtime_error{s.str()};
+      */
   }
 
   // GPU memory allocation
@@ -234,10 +244,31 @@ TrtYoloX::TrtYoloX(
     out_scores_d_ = cuda_utils::make_unique<float[]>(batch_config[2] * max_detections_);
     out_classes_d_ = cuda_utils::make_unique<int32_t[]>(batch_config[2] * max_detections_);
   }
+  if (multitask_) {
+    // Allocate buffer for segmentations
+    segmentation_out_elem_num_ = 0;
+    for (int m = 0; m < multitask_; m++) {
+      const auto output_dims =
+        trt_common_->getBindingDimensions(m + 2);  // 0 : input, 1 : output for detections
+      size_t out_elem_num = std::accumulate(
+        output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies<int>());
+      out_elem_num = out_elem_num * batch_config[2];
+      segmentation_out_elem_num_ += out_elem_num;
+    }
+    segmentation_out_elem_num_per_batch_ =
+      static_cast<int>(segmentation_out_elem_num_ / batch_config[2]);
+    segmentation_out_prob_d_ = cuda_utils::make_unique<float[]>(segmentation_out_elem_num_);
+    segmentation_out_prob_h_ =
+      cuda_utils::make_unique_host<float[]>(segmentation_out_elem_num_, cudaHostAllocPortable);
+  }
   if (use_gpu_preprocess) {
     use_gpu_preprocess_ = true;
     image_buf_h_ = nullptr;
     image_buf_d_ = nullptr;
+    if (multitask_) {
+      argmax_buf_h_ = nullptr;
+      argmax_buf_d_ = nullptr;
+    }
   } else {
     use_gpu_preprocess_ = false;
   }
@@ -252,6 +283,9 @@ TrtYoloX::~TrtYoloX()
     if (image_buf_d_) {
       image_buf_d_.reset();
     }
+    if (argmax_buf_d_) {
+      argmax_buf_d_.reset();
+    }
   }
 }
 
@@ -294,6 +328,25 @@ void TrtYoloX::initPreprocessBuffer(int width, int height)
         width * height * 3 * batch_size_, cudaHostAllocWriteCombined);
       image_buf_d_ = cuda_utils::make_unique<unsigned char[]>(width * height * 3 * batch_size_);
     }
+    if (multitask_) {
+      size_t argmax_out_elem_num_ = 0;
+      for (int m = 0; m < multitask_; m++) {
+        const auto output_dims =
+          trt_common_->getBindingDimensions(m + 2);  // 0 : input, 1 : output for detections
+        const float scale =
+          std::min(output_dims.d[3] / float(width), output_dims.d[2] / float(height));
+        int out_w = (int)(width * scale);
+        int out_h = (int)(height * scale);
+        //	size_t out_elem_num = std::accumulate(
+        // output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies<int>());
+        //	out_elem_num = out_elem_num * batch_size_;
+        size_t out_elem_num = out_w * out_h * batch_size_;
+        argmax_out_elem_num_ += out_elem_num;
+      }
+      argmax_buf_h_ = cuda_utils::make_unique_host<unsigned char[]>(
+        argmax_out_elem_num_, cudaHostAllocWriteCombined);
+      argmax_buf_d_ = cuda_utils::make_unique<unsigned char[]>(argmax_out_elem_num_);
+    }
   }
 }
 
@@ -321,6 +374,12 @@ void TrtYoloX::preprocessGpu(const std::vector<cv::Mat> & images)
         if (image_buf_d_) {
           image_buf_d_.reset();
         }
+        if (argmax_buf_h_) {
+          argmax_buf_h_.reset();
+        }
+        if (argmax_buf_d_) {
+          argmax_buf_d_.reset();
+        }
       }
     }
     src_width_ = width;
@@ -750,7 +809,9 @@ bool TrtYoloX::feedforward(const std::vector<cv::Mat> & images, ObjectArrays & o
 bool TrtYoloX::feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectArrays & objects)
 {
   std::vector<void *> buffers = {input_d_.get(), out_prob_d_.get()};
-
+  if (multitask_) {
+    buffers = {input_d_.get(), out_prob_d_.get(), segmentation_out_prob_d_.get()};
+  }
   trt_common_->enqueueV2(buffers.data(), *stream_, nullptr);
 
   const auto batch_size = images.size();
@@ -758,6 +819,11 @@ bool TrtYoloX::feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectA
   CHECK_CUDA_ERROR(cudaMemcpyAsync(
     out_prob_h_.get(), out_prob_d_.get(), sizeof(float) * out_elem_num_, cudaMemcpyDeviceToHost,
     *stream_));
+  if (multitask_) {
+    CHECK_CUDA_ERROR(cudaMemcpyAsync(
+      segmentation_out_prob_h_.get(), segmentation_out_prob_d_.get(),
+      sizeof(float) * segmentation_out_elem_num_, cudaMemcpyDeviceToHost, *stream_));
+  }
   cudaStreamSynchronize(*stream_);
   objects.clear();
 
@@ -767,6 +833,34 @@ bool TrtYoloX::feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectA
     ObjectArray object_array;
     decodeOutputs(batch_prob, object_array, scales_[i], image_size);
     objects.emplace_back(object_array);
+    if (multitask_) {
+      segmentation_masks_.clear();
+      float * segmentation_results =
+        segmentation_out_prob_h_.get() + (i * segmentation_out_elem_num_per_batch_);
+      size_t counter = 0;
+      int batch = (int)(segmentation_out_elem_num_ / segmentation_out_elem_num_per_batch_);
+      for (int m = 0; m < multitask_; m++) {
+        const auto output_dims =
+          trt_common_->getBindingDimensions(m + 2);  // 0 : input, 1 : output for detections
+        size_t out_elem_num = std::accumulate(
+          output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies<int>());
+        out_elem_num = out_elem_num * batch;
+        const float scale = std::min(
+          output_dims.d[3] / float(image_size.width), output_dims.d[2] / float(image_size.height));
+        int out_w = (int)(image_size.width * scale);
+        int out_h = (int)(image_size.height * scale);
+        cv::Mat mask;
+        if (use_gpu_preprocess_) {
+          float * d_segmentation_results =
+            segmentation_out_prob_d_.get() + (i * segmentation_out_elem_num_per_batch_);
+          mask = getMaskImageGpu(&(d_segmentation_results[counter]), output_dims, out_w, out_h, i);
+        } else {
+          mask = getMaskImage(&(segmentation_results[counter]), output_dims, out_w, out_h);
+        }
+        segmentation_masks_.push_back(mask);
+        counter += out_elem_num;
+      }
+    }
   }
   return true;
 }
@@ -1036,4 +1130,72 @@ void TrtYoloX::nmsSortedBboxes(
   }
 }
 
+cv::Mat TrtYoloX::getMaskImageGpu(float * d_prob, nvinfer1::Dims dims, int out_w, int out_h, int b)
+{
+  // NCHW
+  int classes = dims.d[1];
+  int height = dims.d[2];
+  int width = dims.d[3];
+  cv::Mat mask = cv::Mat::zeros(out_h, out_w, CV_8UC1);
+  int index = b * out_w * out_h;
+  argmax_gpu(
+    (unsigned char *)argmax_buf_d_.get() + index, d_prob, out_w, out_h, width, height, classes, 1,
+    *stream_);
+  CHECK_CUDA_ERROR(cudaMemcpyAsync(
+    argmax_buf_h_.get(), argmax_buf_d_.get(), sizeof(unsigned char) * 1 * out_w * out_h,
+    cudaMemcpyDeviceToHost, *stream_));
+  cudaStreamSynchronize(*stream_);
+  std::memcpy(mask.data, argmax_buf_h_.get() + index, sizeof(unsigned char) * 1 * out_w * out_h);
+  return mask;
+}
+
+cv::Mat TrtYoloX::getMaskImage(float * prob, nvinfer1::Dims dims, int out_w, int out_h)
+{
+  // NCHW
+  int classes = dims.d[1];
+  int height = dims.d[2];
+  int width = dims.d[3];
+  cv::Mat mask = cv::Mat::zeros(out_h, out_w, CV_8UC1);
+  // argmax
+  // #pragma omp parallel for
+  for (int y = 0; y < out_h; y++) {
+    for (int x = 0; x < out_w; x++) {
+      float max = 0.0;
+      int index = 0;
+      for (int c = 0; c < classes; c++) {
+        float value = prob[c * height * width + y * width + x];
+        if (max < value) {
+          max = value;
+          index = c;
+        }
+      }
+      mask.at<unsigned char>(y, x) = index;
+    }
+  }
+  return mask;
+}
+
+int TrtYoloX::getMultitaskNum(void)
+{
+  return multitask_;
+}
+
+cv::Mat TrtYoloX::getColorizedMask(int index, std::vector<Colormap> & colormap)
+{
+  cv::Mat mask;
+  mask = segmentation_masks_[index];
+  int width = mask.cols;
+  int height = mask.rows;
+  cv::Mat cmask = cv::Mat::zeros(height, width, CV_8UC3);
+  for (int y = 0; y < height; y++) {
+    for (int x = 0; x < width; x++) {
+      unsigned char id = mask.at<unsigned char>(y, x);
+      cmask.at<cv::Vec3b>(y, x)[0] = colormap[id].color[2];
+      cmask.at<cv::Vec3b>(y, x)[1] = colormap[id].color[1];
+      cmask.at<cv::Vec3b>(y, x)[2] = colormap[id].color[0];
+    }
+  }
+  return cmask;
+}
+
 }  // namespace tensorrt_yolox

From 83561965c9d050ce741514c74e85a02eb2c8e97a Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Fri, 25 Aug 2023 00:34:22 +0900
Subject: [PATCH 02/39] fix: publish mask

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../include/tensorrt_yolox/preprocess.hpp     |  3 ++-
 .../include/tensorrt_yolox/tensorrt_yolox.hpp | 27 ++++++++++---------
 .../tensorrt_yolox/tensorrt_yolox_node.hpp    |  2 ++
 .../launch/yolox_s_plus_opt.launch.xml        | 27 ++++++++++++++++++-
 .../tensorrt_yolox/src/tensorrt_yolox.cpp     | 12 ++++++---
 .../src/tensorrt_yolox_node.cpp               | 25 ++++++++++++++++-
 .../src/yolox_single_image_inference_node.cpp |  3 ++-
 7 files changed, 78 insertions(+), 21 deletions(-)

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
index c102432d204c9..07ccdb941695c 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
@@ -193,6 +193,7 @@ extern void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
  * @param[in] stream cuda stream
  */
 extern void argmax_gpu(
-  unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch, cudaStream_t stream);
+  unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch,
+  cudaStream_t stream);
 }  // namespace tensorrt_yolox
 #endif  // TENSORRT_YOLOX__PREPROCESS_HPP_
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
index 6663ed6d43ed6..c92bde859d2e8 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -58,7 +58,7 @@ typedef struct Colormap_
   std::string name;
   std::vector<unsigned char> color;
 } Colormap;
-  
+
 /**
  * @class TrtYoloX
  * @brief TensorRT YOLOX for faster inference
@@ -103,7 +103,7 @@ class TrtYoloX
    * @param[out] objects results for object detection
    * @param[in] images batched images
    */
-  bool doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects);
+  bool doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask);
 
   /**
    * @brief run inference including pre-process and post-process
@@ -139,7 +139,7 @@ class TrtYoloX
 
   /**
    * @brief get num for multitask heads
-   */  
+   */
   int getMultitaskNum(void);
 
   /**
@@ -147,9 +147,9 @@ class TrtYoloX
    * @param[out] cmask colorlized mask
    * @param[in] index multitask index
    * @param[in] colormap colormap for masks
-   */  
-  cv::Mat getColorizedMask(int index, std::vector<Colormap> & colormap);  
-  
+   */
+  cv::Mat getColorizedMask(int index, std::vector<Colormap> & colormap);
+
 private:
   /**
    * @brief run preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
@@ -197,7 +197,8 @@ class TrtYoloX
     const cv::Mat & images, int batch_size, ObjectArrays & objects);
 
   bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
-  bool feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectArrays & objects);
+  bool feedforwardAndDecode(
+    const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask);
   void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
   void generateGridsAndStride(
     const int target_w, const int target_h, const std::vector<int> & strides,
@@ -233,8 +234,8 @@ class TrtYoloX
    * @param[in] dims dimension for probmap
    * @param[in] out_w mask width excluding letterbox
    * @param[in] out_h mask height excluding letterbox
-   */  
-  cv::Mat getMaskImage(float *prob, nvinfer1::Dims dims, int out_w, int out_h);
+   */
+  cv::Mat getMaskImage(float * prob, nvinfer1::Dims dims, int out_w, int out_h);
 
   /**
    * @brief get a mask image on GPUs for a segmentation head
@@ -243,9 +244,9 @@ class TrtYoloX
    * @param[in] out_w mask width excluding letterbox
    * @param[in] out_h mask height excluding letterbox
    * @param[in] b current batch
-   */  
-  cv::Mat getMaskImageGpu(float *d_prob, nvinfer1::Dims dims, int out_w, int out_h, int b);      
-  
+   */
+  cv::Mat getMaskImageGpu(float * d_prob, nvinfer1::Dims dims, int out_w, int out_h, int b);
+
   std::unique_ptr<tensorrt_common::TrtCommon> trt_common_;
 
   std::vector<float> input_h_;
@@ -301,7 +302,7 @@ class TrtYoloX
   // host buffer for argmax postprocessing on GPU
   CudaUniquePtrHost<unsigned char[]> argmax_buf_h_;
   // device buffer for argmax postprocessing  on GPU
-  CudaUniquePtr<unsigned char[]> argmax_buf_d_;  
+  CudaUniquePtr<unsigned char[]> argmax_buf_d_;
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
index 9b440f1dbdfec..514b846d14a3a 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -53,6 +53,8 @@ class TrtYoloXNode : public rclcpp::Node
   void replaceLabelMap();
 
   image_transport::Publisher image_pub_;
+  image_transport::Publisher mask_pub_;
+
   rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;
 
   image_transport::Subscriber image_sub_;
diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 2a5219e4ac498..1aa3e26dae5fe 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -3,9 +3,33 @@
   <!-- cspell:ignore finetune -->
   <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
   <arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
+  <arg name="output/mask" default="/perception/object_recognition/detection/mask0"/>
   <arg name="model_name" default="yoloXP-sPlus-T4-960x960-pseudo-finetune-semseg"/>
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
-  <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
+  <arg name="model_path" default="$(var data_path)/tensorrt_yolox"/>
+  <arg name="score_threshold" default="0.35"/>
+  <arg name="nms_threshold" default="0.7"/>
+  <arg name="precision" default="int8" description="operation precision to be used on inference. Valid value is one of: [fp32, fp16, int8]"/>
+  <arg
+    name="calibration_algorithm"
+    default="Entropy"
+    description="Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: [Entropy, (Legacy | Percentile), MinMax]"
+  />
+  <arg name="dla_core_id" default="-1" description="If positive ID value is specified, the node assign inference task to the DLA core"/>
+  <arg name="quantize_first_layer" default="false" description="If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8"/>
+  <arg name="quantize_last_layer" default="false" description="If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8"/>
+  <arg
+    name="profile_per_layer"
+    default="false"
+    description="If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose."
+  />
+  <arg
+    name="clip_value"
+    default="6.0"
+    description="If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration."
+  />
+  <arg name="preprocess_on_gpu" default="false" description="If true, pre-processing is performed on GPU"/>
+  <arg name="calibration_image_list_path" default="" description="Path to a file which contains path to images. Those images will be used for int8 quantization."/>
   <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>
 
@@ -17,6 +41,7 @@
   <node pkg="tensorrt_yolox" exec="tensorrt_yolox_node_exe" name="tensorrt_yolox" output="screen">
     <remap from="~/in/image" to="$(var input/image)"/>
     <remap from="~/out/objects" to="$(var output/objects)"/>
+    <remap from="~/out/mask" to="$(var output/mask)"/>
     <param from="$(var yolox_param_path)" allow_substs="true"/>
     <param name="build_only" value="$(var build_only)"/>
   </node>
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index f157a58056353..3de0bc7d87c87 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -465,7 +465,8 @@ void TrtYoloX::preprocess(const std::vector<cv::Mat> & images)
   // No Need for Sync
 }
 
-bool TrtYoloX::doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects)
+bool TrtYoloX::doInference(
+  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask)
 {
   if (!trt_common_->isInitialized()) {
     return false;
@@ -478,7 +479,7 @@ bool TrtYoloX::doInference(const std::vector<cv::Mat> & images, ObjectArrays & o
   }
 
   if (needs_output_decode_) {
-    return feedforwardAndDecode(images, objects);
+    return feedforwardAndDecode(images, objects, mask);
   } else {
     return feedforward(images, objects);
   }
@@ -718,6 +719,7 @@ void TrtYoloX::multiScalePreprocess(const cv::Mat & image, const std::vector<cv:
 bool TrtYoloX::doInferenceWithRoi(
   const std::vector<cv::Mat> & images, ObjectArrays & objects, const std::vector<cv::Rect> & rois)
 {
+  cv::Mat mask;
   if (!trt_common_->isInitialized()) {
     return false;
   }
@@ -728,7 +730,7 @@ bool TrtYoloX::doInferenceWithRoi(
   }
 
   if (needs_output_decode_) {
-    return feedforwardAndDecode(images, objects);
+    return feedforwardAndDecode(images, objects, mask);
   } else {
     return feedforward(images, objects);
   }
@@ -806,7 +808,8 @@ bool TrtYoloX::feedforward(const std::vector<cv::Mat> & images, ObjectArrays & o
   return true;
 }
 
-bool TrtYoloX::feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectArrays & objects)
+bool TrtYoloX::feedforwardAndDecode(
+  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask)
 {
   std::vector<void *> buffers = {input_d_.get(), out_prob_d_.get()};
   if (multitask_) {
@@ -860,6 +863,7 @@ bool TrtYoloX::feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectA
         segmentation_masks_.push_back(mask);
         counter += out_elem_num;
       }
+      mask = segmentation_masks_.at(0);
     }
   }
   return true;
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index af435ce6efe83..f481f414e6324 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -107,6 +107,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
 
   objects_pub_ = this->create_publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>(
     "~/out/objects", 1);
+  mask_pub_ = image_transport::create_publisher(this, "~/out/mask");
   image_pub_ = image_transport::create_publisher(this, "~/out/image");
 
   if (declare_parameter("build_only", false)) {
@@ -145,7 +146,9 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
   const auto height = in_image_ptr->image.rows;
 
   tensorrt_yolox::ObjectArrays objects;
-  if (!trt_yolox_->doInference({in_image_ptr->image}, objects)) {
+  cv::Mat mask(cv::Size(height, width), CV_8UC1, cv::Scalar(0));
+
+  if (!trt_yolox_->doInference({in_image_ptr->image}, objects, mask)) {
     RCLCPP_WARN(this->get_logger(), "Fail to inference");
     return;
   }
@@ -169,6 +172,26 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
       in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3,
       8, 0);
   }
+
+  sensor_msgs::msg::Image::SharedPtr out_mask_msg =
+    cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::MONO8, mask)
+      .toImageMsg();
+  out_mask_msg->header = msg->header;
+  mask_pub_.publish(out_mask_msg);
+
+  // cv_bridge::CvImage cv_mat;
+  // cv_mat.encoding = sensor_msgs::image_encodings::MONO8;
+  // cv_mat.image = mask;
+  // sensor_msgs::msg::Image::SharedPtr out_mask_msg;
+  // out_mask_msg = cv_mat.toImageMsg();
+  // out_mask_msg->header = msg->header;
+  // mask_pub_.publish(out_mask_msg);
+
+  // sensor_msgs::msg::Image::SharedPtr out_mask_msg =
+  //     cv_bridge::CvImage(std_msgs::msg::Header(), "mono8", mask).toImageMsg();
+  // out_mask_msg->header = msg->header;
+  // mask_pub_.publish(out_mask_msg);
+
   image_pub_.publish(in_image_ptr->toImageMsg());
 
   out_objects.header = msg->header;
diff --git a/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
index 0657f0096b07e..86e662a5e7b56 100644
--- a/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
+++ b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
@@ -47,7 +47,8 @@ class YoloXSingleImageInferenceNode : public rclcpp::Node
     auto trt_yolox = std::make_unique<tensorrt_yolox::TrtYoloX>(model_path, precision);
     auto image = cv::imread(image_path);
     tensorrt_yolox::ObjectArrays objects;
-    trt_yolox->doInference({image}, objects);
+    cv::Mat mask;
+    trt_yolox->doInference({image}, objects, mask);
     for (const auto & object : objects[0]) {
       const auto left = object.x_offset;
       const auto top = object.y_offset;

From 297528ee2f4002577ce4425019028e87d9977d2c Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Sat, 26 Aug 2023 05:55:10 +0900
Subject: [PATCH 03/39] feat: publish colorized mask

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../include/tensorrt_yolox/tensorrt_yolox.hpp | 13 +++-
 .../tensorrt_yolox/tensorrt_yolox_node.hpp    |  1 +
 .../launch/yolox_s_plus_opt.launch.xml        |  1 +
 .../tensorrt_yolox/src/tensorrt_yolox.cpp     | 73 +++++++++++++++++--
 .../src/tensorrt_yolox_node.cpp               | 27 +++----
 .../src/yolox_single_image_inference_node.cpp |  8 +-
 6 files changed, 95 insertions(+), 28 deletions(-)

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
index c92bde859d2e8..50bcd3f414949 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -86,8 +86,9 @@ class TrtYoloX
    * @param[in] max_workspace_size maximum workspace for building TensorRT engine
    */
   TrtYoloX(
-    const std::string & model_path, const std::string & precision, const int num_class = 8,
-    const float score_threshold = 0.3, const float nms_threshold = 0.7,
+    const std::string & model_path, const std::string & precision,
+    const std::string & color_map_path, const int num_class = 8, const float score_threshold = 0.3,
+    const float nms_threshold = 0.7,
     const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(),
     const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(),
     const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "",
@@ -103,7 +104,9 @@ class TrtYoloX
    * @param[out] objects results for object detection
    * @param[in] images batched images
    */
-  bool doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask);
+  bool doInference(
+    const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask,
+    cv::Mat & color_mask);
 
   /**
    * @brief run inference including pre-process and post-process
@@ -198,7 +201,8 @@ class TrtYoloX
 
   bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
   bool feedforwardAndDecode(
-    const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask);
+    const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask,
+    cv::Mat & color_mask);
   void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
   void generateGridsAndStride(
     const int target_w, const int target_h, const std::vector<int> & strides,
@@ -303,6 +307,7 @@ class TrtYoloX
   CudaUniquePtrHost<unsigned char[]> argmax_buf_h_;
   // device buffer for argmax postprocessing  on GPU
   CudaUniquePtr<unsigned char[]> argmax_buf_d_;
+  std::vector<tensorrt_yolox::Colormap> color_map_;
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
index 514b846d14a3a..8582715816bdc 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -54,6 +54,7 @@ class TrtYoloXNode : public rclcpp::Node
 
   image_transport::Publisher image_pub_;
   image_transport::Publisher mask_pub_;
+  image_transport::Publisher color_mask_pub_;
 
   rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;
 
diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 1aa3e26dae5fe..73248059ed1fd 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -44,5 +44,6 @@
     <remap from="~/out/mask" to="$(var output/mask)"/>
     <param from="$(var yolox_param_path)" allow_substs="true"/>
     <param name="build_only" value="$(var build_only)"/>
+    <param name="color_map_path" value="$(var model_path)/bdd100k_semseg.csv"/>
   </node>
 </launch>
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 3de0bc7d87c87..c7e38f15837b1 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -15,12 +15,18 @@
 #include "cuda_utils/cuda_check_error.hpp"
 #include "cuda_utils/cuda_unique_ptr.hpp"
 
+#include <experimental/filesystem>
 #include <tensorrt_yolox/calibrator.hpp>
 #include <tensorrt_yolox/preprocess.hpp>
 #include <tensorrt_yolox/tensorrt_yolox.hpp>
 
+#include <assert.h>
+
 #include <algorithm>
+#include <fstream>
 #include <functional>
+#include <iomanip>
+#include <iostream>
 #include <memory>
 #include <numeric>
 #include <stdexcept>
@@ -97,14 +103,62 @@ std::vector<std::string> loadImageList(const std::string & filename, const std::
   }
   return fileList;
 }
+
+std::vector<tensorrt_yolox::Colormap> get_seg_colormap(const std::string & filename)
+{
+  std::vector<tensorrt_yolox::Colormap> seg_cmap;
+  if (filename != "not-specified") {
+    std::vector<std::string> color_list = loadListFromTextFile(filename);
+    for (int i = 0; i < (int)color_list.size(); i++) {
+      if (i == 0) {
+        // Skip header
+        continue;
+      }
+      std::string colormapString = color_list[i];
+      tensorrt_yolox::Colormap cmap;
+      std::vector<int> rgb;
+      size_t npos = colormapString.find_first_of(',');
+      assert(npos != std::string::npos);
+      std::string substr = colormapString.substr(0, npos);
+      int id = (int)std::stoi(trim(substr));
+      colormapString.erase(0, npos + 1);
+
+      npos = colormapString.find_first_of(',');
+      assert(npos != std::string::npos);
+      substr = colormapString.substr(0, npos);
+      std::string name = (trim(substr));
+      cmap.id = id;
+      cmap.name = name;
+      colormapString.erase(0, npos + 1);
+      while (!colormapString.empty()) {
+        size_t npos = colormapString.find_first_of(',');
+        if (npos != std::string::npos) {
+          substr = colormapString.substr(0, npos);
+          unsigned char c = (unsigned char)std::stoi(trim(substr));
+          cmap.color.push_back(c);
+          colormapString.erase(0, npos + 1);
+        } else {
+          unsigned char c = (unsigned char)std::stoi(trim(colormapString));
+          cmap.color.push_back(c);
+          break;
+        }
+      }
+
+      seg_cmap.push_back(cmap);
+    }
+  }
+  return seg_cmap;
+}
+
 }  // anonymous namespace
 
 namespace tensorrt_yolox
 {
 TrtYoloX::TrtYoloX(
-  const std::string & model_path, const std::string & precision, const int num_class,
-  const float score_threshold, const float nms_threshold, tensorrt_common::BuildConfig build_config,
-  const bool use_gpu_preprocess, std::string calibration_image_list_path, const double norm_factor,
+  const std::string & model_path, const std::string & precision, const std::string & color_map_path,
+  const int num_class, const float score_threshold, const float nms_threshold,
+  tensorrt_common::BuildConfig build_config, const bool use_gpu_preprocess,
+  std::string calibration_image_list_path, const double norm_factor,
   [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config,
   const size_t max_workspace_size)
 {
@@ -113,6 +167,7 @@ TrtYoloX::TrtYoloX(
   norm_factor_ = norm_factor;
   batch_size_ = batch_config[2];
   multitask_ = 0;
+  color_map_ = get_seg_colormap(color_map_path);
   if (precision == "int8") {
     if (build_config.clip_value <= 0.0) {
       if (calibration_image_list_path.empty()) {
@@ -466,7 +521,8 @@ void TrtYoloX::preprocess(const std::vector<cv::Mat> & images)
 }
 
 bool TrtYoloX::doInference(
-  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask)
+  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask,
+  [[maybe_unused]] cv::Mat & color_mask)
 {
   if (!trt_common_->isInitialized()) {
     return false;
@@ -479,7 +535,7 @@ bool TrtYoloX::doInference(
   }
 
   if (needs_output_decode_) {
-    return feedforwardAndDecode(images, objects, mask);
+    return feedforwardAndDecode(images, objects, mask, color_mask);
   } else {
     return feedforward(images, objects);
   }
@@ -720,6 +776,7 @@ bool TrtYoloX::doInferenceWithRoi(
   const std::vector<cv::Mat> & images, ObjectArrays & objects, const std::vector<cv::Rect> & rois)
 {
   cv::Mat mask;
+  cv::Mat color_mask;
   if (!trt_common_->isInitialized()) {
     return false;
   }
@@ -730,7 +787,7 @@ bool TrtYoloX::doInferenceWithRoi(
   }
 
   if (needs_output_decode_) {
-    return feedforwardAndDecode(images, objects, mask);
+    return feedforwardAndDecode(images, objects, mask, color_mask);
   } else {
     return feedforward(images, objects);
   }
@@ -809,7 +866,8 @@ bool TrtYoloX::feedforward(const std::vector<cv::Mat> & images, ObjectArrays & o
 }
 
 bool TrtYoloX::feedforwardAndDecode(
-  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask)
+  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask,
+  [[maybe_unused]] cv::Mat & color_mask)
 {
   std::vector<void *> buffers = {input_d_.get(), out_prob_d_.get()};
   if (multitask_) {
@@ -864,6 +922,7 @@ bool TrtYoloX::feedforwardAndDecode(
         counter += out_elem_num;
       }
       mask = segmentation_masks_.at(0);
+      color_mask = getColorizedMask(i, color_map_);
     }
   }
   return true;
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index f481f414e6324..5e61bf9952351 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -88,6 +88,8 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
     ("Path to a file which contains path to images."
      "Those images will be used for int8 quantization."));
 
+  std::string color_map_path = declare_parameter_with_description(
+    "color_map_path", "", ("Path to a file which contains path to color map."));
   if (!readLabelFile(label_path)) {
     RCLCPP_ERROR(this->get_logger(), "Could not find label file");
     rclcpp::shutdown();
@@ -99,8 +101,8 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
     profile_per_layer, clip_value);
 
   trt_yolox_ = std::make_unique<tensorrt_yolox::TrtYoloX>(
-    model_path, precision, label_map_.size(), score_threshold, nms_threshold, build_config,
-    preprocess_on_gpu, calibration_image_list_path);
+    model_path, precision, color_map_path, label_map_.size(), score_threshold, nms_threshold,
+    build_config, preprocess_on_gpu, calibration_image_list_path);
 
   timer_ =
     rclcpp::create_timer(this, get_clock(), 100ms, std::bind(&TrtYoloXNode::onConnect, this));
@@ -108,6 +110,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
   objects_pub_ = this->create_publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>(
     "~/out/objects", 1);
   mask_pub_ = image_transport::create_publisher(this, "~/out/mask");
+  color_mask_pub_ = image_transport::create_publisher(this, "~/out/color_mask");
   image_pub_ = image_transport::create_publisher(this, "~/out/image");
 
   if (declare_parameter("build_only", false)) {
@@ -147,8 +150,9 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
 
   tensorrt_yolox::ObjectArrays objects;
   cv::Mat mask(cv::Size(height, width), CV_8UC1, cv::Scalar(0));
+  cv::Mat color_mask(cv::Size(height, width), CV_8UC3, cv::Scalar(0, 0, 0));
 
-  if (!trt_yolox_->doInference({in_image_ptr->image}, objects, mask)) {
+  if (!trt_yolox_->doInference({in_image_ptr->image}, objects, mask, color_mask)) {
     RCLCPP_WARN(this->get_logger(), "Fail to inference");
     return;
   }
@@ -179,18 +183,11 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
   out_mask_msg->header = msg->header;
   mask_pub_.publish(out_mask_msg);
 
-  // cv_bridge::CvImage cv_mat;
-  // cv_mat.encoding = sensor_msgs::image_encodings::MONO8;
-  // cv_mat.image = mask;
-  // sensor_msgs::msg::Image::SharedPtr out_mask_msg;
-  // out_mask_msg = cv_mat.toImageMsg();
-  // out_mask_msg->header = msg->header;
-  // mask_pub_.publish(out_mask_msg);
-
-  // sensor_msgs::msg::Image::SharedPtr out_mask_msg =
-  //     cv_bridge::CvImage(std_msgs::msg::Header(), "mono8", mask).toImageMsg();
-  // out_mask_msg->header = msg->header;
-  // mask_pub_.publish(out_mask_msg);
+  sensor_msgs::msg::Image::SharedPtr output_color_mask_msg =
+    cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::BGR8, color_mask)
+      .toImageMsg();
+  output_color_mask_msg->header = msg->header;
+  color_mask_pub_.publish(output_color_mask_msg);
 
   image_pub_.publish(in_image_ptr->toImageMsg());
 
diff --git a/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
index 86e662a5e7b56..5fd54a960b9de 100644
--- a/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
+++ b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
@@ -43,12 +43,16 @@ class YoloXSingleImageInferenceNode : public rclcpp::Node
     p.replace_extension("");
     const auto output_image_path =
       declare_parameter("output_image_path", p.string() + "_detect" + ext);
+    std::string color_map_path = declare_parameter("color_map_path", "");
+    // seg_cmap_ = get_seg_colormap(color_map_path);
 
-    auto trt_yolox = std::make_unique<tensorrt_yolox::TrtYoloX>(model_path, precision);
+    auto trt_yolox =
+      std::make_unique<tensorrt_yolox::TrtYoloX>(model_path, precision, color_map_path);
     auto image = cv::imread(image_path);
     tensorrt_yolox::ObjectArrays objects;
     cv::Mat mask;
-    trt_yolox->doInference({image}, objects, mask);
+    cv::Mat color_mask;
+    trt_yolox->doInference({image}, objects, mask, color_mask);
     for (const auto & object : objects[0]) {
       const auto left = object.x_offset;
       const auto top = object.y_offset;

From 6dbbe77fe6681c3bd361993ee57476ef7bd23c0a Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Tue, 29 Aug 2023 10:39:04 +0900
Subject: [PATCH 04/39] fix: resize yolox mask

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp      | 6 +++---
 perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp | 7 ++++++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index c7e38f15837b1..9ed710a6e9946 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -866,7 +866,7 @@ bool TrtYoloX::feedforward(const std::vector<cv::Mat> & images, ObjectArrays & o
 }
 
 bool TrtYoloX::feedforwardAndDecode(
-  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask,
+  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & out_mask,
   [[maybe_unused]] cv::Mat & color_mask)
 {
   std::vector<void *> buffers = {input_d_.get(), out_prob_d_.get()};
@@ -921,8 +921,8 @@ bool TrtYoloX::feedforwardAndDecode(
         segmentation_masks_.push_back(mask);
         counter += out_elem_num;
       }
-      mask = segmentation_masks_.at(0);
-      color_mask = getColorizedMask(i, color_map_);
+      out_mask = segmentation_masks_.at(0);
+      color_mask = getColorizedMask(0, color_map_);
     }
   }
   return true;
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 5e61bf9952351..92969e7a7f2e7 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -176,13 +176,18 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
       in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3,
       8, 0);
   }
-
+  cv::resize(
+    mask, mask, cv::Size(in_image_ptr->image.cols, in_image_ptr->image.rows), 0, 0,
+    cv::INTER_NEAREST);
   sensor_msgs::msg::Image::SharedPtr out_mask_msg =
     cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::MONO8, mask)
       .toImageMsg();
   out_mask_msg->header = msg->header;
   mask_pub_.publish(out_mask_msg);
 
+  cv::resize(
+    color_mask, color_mask, cv::Size(in_image_ptr->image.cols, in_image_ptr->image.rows), 0, 0,
+    cv::INTER_NEAREST);
   sensor_msgs::msg::Image::SharedPtr output_color_mask_msg =
     cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::BGR8, color_mask)
       .toImageMsg();

From f64898003f90b720fa66448ace8ef5199deac946 Mon Sep 17 00:00:00 2001
From: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp>
Date: Fri, 6 Oct 2023 18:59:10 +0900
Subject: [PATCH 05/39] fix: add memory allocate operations

Signed-off-by: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp>
---
 .../tensorrt_yolox/src/tensorrt_yolox.cpp     | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 9ed710a6e9946..f5745b1343f41 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -447,6 +447,7 @@ void TrtYoloX::preprocessGpu(const std::vector<cv::Mat> & images)
   const float input_height = static_cast<float>(input_dims.d[2]);
   const float input_width = static_cast<float>(input_dims.d[3]);
   int b = 0;
+  size_t argmax_out_elem_num = 0;
   for (const auto & image : images) {
     if (!image_buf_h_) {
       const float scale = std::min(input_width / image.cols, input_height / image.rows);
@@ -462,7 +463,30 @@ void TrtYoloX::preprocessGpu(const std::vector<cv::Mat> & images)
       image_buf_h_.get() + index, &image.data[0],
       image.cols * image.rows * 3 * sizeof(unsigned char));
     b++;
+
+    if (multitask_) {
+      for (int m = 0; m < multitask_; m++) {
+        const auto output_dims =
+            trt_common_->getBindingDimensions(m + 2);  // 0: input, 1: output for detections
+        const float scale =
+            std::min(output_dims.d[3] / float(image.cols), output_dims.d[2] / float(image.rows));
+        int out_w = static_cast<int>(image.cols * scale);
+        int out_h = static_cast<int>(image.rows * scale);
+        argmax_out_elem_num += out_w * out_h * batch_size;
+      }
+    }
   }
+
+  if (multitask_) {
+    if (!argmax_buf_h_) {
+      argmax_buf_h_ = cuda_utils::make_unique_host<unsigned char[]>(
+          argmax_out_elem_num, cudaHostAllocPortable);
+    }
+    if (!argmax_buf_d_) {
+      argmax_buf_d_ = cuda_utils::make_unique<unsigned char[]>(argmax_out_elem_num);
+    }
+  }
+
   // Copy into device memory
   CHECK_CUDA_ERROR(cudaMemcpyAsync(
     image_buf_d_.get(), image_buf_h_.get(),

From 3ab80e97c4933463c703098643264d087923cc13 Mon Sep 17 00:00:00 2001
From: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp>
Date: Fri, 6 Oct 2023 18:59:41 +0900
Subject: [PATCH 06/39] refactor: remove underscore for a local variable

Signed-off-by: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp>
---
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index f5745b1343f41..1d86fe434a059 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -384,7 +384,7 @@ void TrtYoloX::initPreprocessBuffer(int width, int height)
       image_buf_d_ = cuda_utils::make_unique<unsigned char[]>(width * height * 3 * batch_size_);
     }
     if (multitask_) {
-      size_t argmax_out_elem_num_ = 0;
+      size_t argmax_out_elem_num = 0;
       for (int m = 0; m < multitask_; m++) {
         const auto output_dims =
           trt_common_->getBindingDimensions(m + 2);  // 0 : input, 1 : output for detections
@@ -396,11 +396,11 @@ void TrtYoloX::initPreprocessBuffer(int width, int height)
         // output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies<int>());
         //	out_elem_num = out_elem_num * batch_size_;
         size_t out_elem_num = out_w * out_h * batch_size_;
-        argmax_out_elem_num_ += out_elem_num;
+        argmax_out_elem_num += out_elem_num;
       }
       argmax_buf_h_ = cuda_utils::make_unique_host<unsigned char[]>(
-        argmax_out_elem_num_, cudaHostAllocWriteCombined);
-      argmax_buf_d_ = cuda_utils::make_unique<unsigned char[]>(argmax_out_elem_num_);
+          argmax_out_elem_num, cudaHostAllocPortable);
+      argmax_buf_d_ = cuda_utils::make_unique<unsigned char[]>(argmax_out_elem_num);
     }
   }
 }

From b1f901fb118d5ffbbe3e3512cffa2ec31599923b Mon Sep 17 00:00:00 2001
From: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp>
Date: Fri, 6 Oct 2023 19:00:28 +0900
Subject: [PATCH 07/39] chore: add condition to check the number of subscriber
 for newly added topics

Signed-off-by: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp>
---
 perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 92969e7a7f2e7..3674cfd27d57d 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -125,7 +125,9 @@ void TrtYoloXNode::onConnect()
   if (
     objects_pub_->get_subscription_count() == 0 &&
     objects_pub_->get_intra_process_subscription_count() == 0 &&
-    image_pub_.getNumSubscribers() == 0) {
+    image_pub_.getNumSubscribers() == 0 &&
+    mask_pub_.getNumSubscribers() == 0 &&
+    color_mask_pub_.getNumSubscribers() == 0) {
     image_sub_.shutdown();
   } else if (!image_sub_) {
     image_sub_ = image_transport::create_subscription(

From 57f7daf4e7eca7bbc439077cc969ba0ebdc05da3 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Fri, 10 Nov 2023 21:24:05 +0900
Subject: [PATCH 08/39] chore: pre-commit

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../include/tensorrt_yolox/preprocess.hpp          |  2 +-
 .../include/tensorrt_yolox/tensorrt_yolox.hpp      | 10 +++++-----
 .../launch/yolox_s_plus_opt.launch.xml             |  4 ++--
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp   | 14 +++++++-------
 .../tensorrt_yolox/src/tensorrt_yolox_node.cpp     |  3 +--
 5 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
index 07ccdb941695c..faac6de4e3284 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp
@@ -183,7 +183,7 @@ extern void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
 /**
  * @brief Argmax on GPU
  * @param[out] dst processed image
- * @param[in] src probabilty map
+ * @param[in] src probability map
  * @param[in] d_w width for output
  * @param[in] d_h height for output
  * @param[in] s_w width for input
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
index 50bcd3f414949..a0ab15b6db193 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -146,8 +146,8 @@ class TrtYoloX
   int getMultitaskNum(void);
 
   /**
-   * @brief get colorlized masks from index using specifief colormap
-   * @param[out] cmask colorlized mask
+   * @brief get colorized masks from index using specific colormap
+   * @param[out] cmask colorized mask
    * @param[in] index multitask index
    * @param[in] colormap colormap for masks
    */
@@ -234,8 +234,8 @@ class TrtYoloX
   /**
    * @brief get a mask image for a segmentation head
    * @param[out] argmax argmax results
-   * @param[in] prob probmap
-   * @param[in] dims dimension for probmap
+   * @param[in] prob probability map
+   * @param[in] dims dimension for probability map
    * @param[in] out_w mask width excluding letterbox
    * @param[in] out_h mask height excluding letterbox
    */
@@ -244,7 +244,7 @@ class TrtYoloX
   /**
    * @brief get a mask image on GPUs for a segmentation head
    * @param[out] mask image
-   * @param[in] prob probablity map on device
+   * @param[in] prob probability map on device
    * @param[in] out_w mask width excluding letterbox
    * @param[in] out_h mask height excluding letterbox
    * @param[in] b current batch
diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 73248059ed1fd..2038d351fa908 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <launch>
-  <!-- cspell:ignore finetune -->
+  <!-- cspell: ignore semseg, finetune  -->
   <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
   <arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
   <arg name="output/mask" default="/perception/object_recognition/detection/mask0"/>
@@ -28,7 +28,7 @@
     default="6.0"
     description="If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration."
   />
-  <arg name="preprocess_on_gpu" default="false" description="If true, pre-processing is performed on GPU"/>
+  <arg name="preprocess_on_gpu" default="true" description="If true, pre-processing is performed on GPU"/>
   <arg name="calibration_image_list_path" default="" description="Path to a file which contains path to images. Those images will be used for int8 quantization."/>
   <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 1d86fe434a059..9b5a6aa75df72 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -300,7 +300,7 @@ TrtYoloX::TrtYoloX(
     out_classes_d_ = cuda_utils::make_unique<int32_t[]>(batch_config[2] * max_detections_);
   }
   if (multitask_) {
-    // Allocate buffer for segmentations
+    // Allocate buffer for segmentation
     segmentation_out_elem_num_ = 0;
     for (int m = 0; m < multitask_; m++) {
       const auto output_dims =
@@ -398,8 +398,8 @@ void TrtYoloX::initPreprocessBuffer(int width, int height)
         size_t out_elem_num = out_w * out_h * batch_size_;
         argmax_out_elem_num += out_elem_num;
       }
-      argmax_buf_h_ = cuda_utils::make_unique_host<unsigned char[]>(
-          argmax_out_elem_num, cudaHostAllocPortable);
+      argmax_buf_h_ =
+        cuda_utils::make_unique_host<unsigned char[]>(argmax_out_elem_num, cudaHostAllocPortable);
       argmax_buf_d_ = cuda_utils::make_unique<unsigned char[]>(argmax_out_elem_num);
     }
   }
@@ -467,9 +467,9 @@ void TrtYoloX::preprocessGpu(const std::vector<cv::Mat> & images)
     if (multitask_) {
       for (int m = 0; m < multitask_; m++) {
         const auto output_dims =
-            trt_common_->getBindingDimensions(m + 2);  // 0: input, 1: output for detections
+          trt_common_->getBindingDimensions(m + 2);  // 0: input, 1: output for detections
         const float scale =
-            std::min(output_dims.d[3] / float(image.cols), output_dims.d[2] / float(image.rows));
+          std::min(output_dims.d[3] / float(image.cols), output_dims.d[2] / float(image.rows));
         int out_w = static_cast<int>(image.cols * scale);
         int out_h = static_cast<int>(image.rows * scale);
         argmax_out_elem_num += out_w * out_h * batch_size;
@@ -479,8 +479,8 @@ void TrtYoloX::preprocessGpu(const std::vector<cv::Mat> & images)
 
   if (multitask_) {
     if (!argmax_buf_h_) {
-      argmax_buf_h_ = cuda_utils::make_unique_host<unsigned char[]>(
-          argmax_out_elem_num, cudaHostAllocPortable);
+      argmax_buf_h_ =
+        cuda_utils::make_unique_host<unsigned char[]>(argmax_out_elem_num, cudaHostAllocPortable);
     }
     if (!argmax_buf_d_) {
       argmax_buf_d_ = cuda_utils::make_unique<unsigned char[]>(argmax_out_elem_num);
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 3674cfd27d57d..f79adfa9f71ee 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -125,8 +125,7 @@ void TrtYoloXNode::onConnect()
   if (
     objects_pub_->get_subscription_count() == 0 &&
     objects_pub_->get_intra_process_subscription_count() == 0 &&
-    image_pub_.getNumSubscribers() == 0 &&
-    mask_pub_.getNumSubscribers() == 0 &&
+    image_pub_.getNumSubscribers() == 0 && mask_pub_.getNumSubscribers() == 0 &&
     color_mask_pub_.getNumSubscribers() == 0) {
     image_sub_.shutdown();
   } else if (!image_sub_) {

From b377d4bfb43b4725c21c72321441ac62a0d8b196 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Mon, 20 Nov 2023 11:01:01 +0900
Subject: [PATCH 09/39] fix: add roi overlapping segment

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../include/tensorrt_yolox/tensorrt_yolox.hpp | 20 +++--
 .../tensorrt_yolox/tensorrt_yolox_node.hpp    |  6 +-
 .../launch/yolox_s_plus_opt.launch.xml        |  9 ++-
 .../tensorrt_yolox/src/tensorrt_yolox.cpp     | 68 ++++++++++-------
 .../src/tensorrt_yolox_node.cpp               | 76 ++++++++++++++-----
 .../src/yolox_single_image_inference_node.cpp |  6 +-
 6 files changed, 129 insertions(+), 56 deletions(-)

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
index a0ab15b6db193..4f0f147eefed4 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -78,6 +78,7 @@ class TrtYoloX
    * @param[in] build_config configuration including precision, calibration method, DLA, remaining
    * fp16 for first layer,  remaining fp16 for last layer and profiler for builder
    * @param[in] use_gpu_preprocess whether use cuda gpu for preprocessing
+   * @param[in] publish_color_mask whether publish color_mask for debugging and visualization
    * @param[in] calibration_image_list_file path for calibration files (only require for
    * quantization)
    * @param[in] norm_factor scaling factor for preprocess
@@ -90,8 +91,9 @@ class TrtYoloX
     const std::string & color_map_path, const int num_class = 8, const float score_threshold = 0.3,
     const float nms_threshold = 0.7,
     const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(),
-    const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(),
-    const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "",
+    const bool use_gpu_preprocess = false, const bool publish_color_mask = false,
+    std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0,
+    [[maybe_unused]] const std::string & cache_dir = "",
     const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
     const size_t max_workspace_size = (1 << 30));
   /**
@@ -105,8 +107,8 @@ class TrtYoloX
    * @param[in] images batched images
    */
   bool doInference(
-    const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask,
-    cv::Mat & color_mask);
+    const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
+    std::vector<cv::Mat> & color_masks);
 
   /**
    * @brief run inference including pre-process and post-process
@@ -201,8 +203,8 @@ class TrtYoloX
 
   bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
   bool feedforwardAndDecode(
-    const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask,
-    cv::Mat & color_mask);
+    const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
+    std::vector<cv::Mat> & color_masks);
   void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
   void generateGridsAndStride(
     const int target_w, const int target_h, const std::vector<int> & strides,
@@ -307,7 +309,11 @@ class TrtYoloX
   CudaUniquePtrHost<unsigned char[]> argmax_buf_h_;
   // device buffer for argmax postprocessing  on GPU
   CudaUniquePtr<unsigned char[]> argmax_buf_d_;
-  std::vector<tensorrt_yolox::Colormap> color_map_;
+  std::vector<tensorrt_yolox::Colormap> sematic_color_map_;
+  // flag whether overlay segmentation by roi
+  bool roi_overlap_segment_;
+  // flag where publish color mask for debugging and visualization
+  bool publish_color_mask_;
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
index 8582715816bdc..9bc88d89cacf6 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -51,7 +51,8 @@ class TrtYoloXNode : public rclcpp::Node
   void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
   bool readLabelFile(const std::string & label_path);
   void replaceLabelMap();
-
+  void overlapSegmentByRoi(const tensorrt_yolox::Object & object, cv::Mat & mask);
+  int mapRoiLabel2SegLabel(const int32_t roi_label_index);
   image_transport::Publisher image_pub_;
   image_transport::Publisher mask_pub_;
   image_transport::Publisher color_mask_pub_;
@@ -64,6 +65,9 @@ class TrtYoloXNode : public rclcpp::Node
 
   LabelMap label_map_;
   std::unique_ptr<tensorrt_yolox::TrtYoloX> trt_yolox_;
+  bool is_roi_overlap_segment_;
+  bool is_publish_color_mask_;
+  float overlap_roi_score_threshold_;
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 2038d351fa908..5cc2896d6522e 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -32,7 +32,9 @@
   <arg name="calibration_image_list_path" default="" description="Path to a file which contains path to images. Those images will be used for int8 quantization."/>
   <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>
-
+  <arg name="is_roi_overlap_segment" default="true" description="refine segmentation mask by overlay roi class, disable when sematic segmentation accuracy is good enough"/>
+  <arg name="is_publish_color_mask" default="false" description="publish color mask for result visualization"/>
+  <arg name="overlap_roi_score_threshold" default="0.3" description="the roi object existance probability threshold that consider to replace segmentation"/>
   <node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
     <remap from="~/input/compressed_image" to="$(var input/image)/compressed"/>
     <remap from="~/output/raw_image" to="$(var input/image)"/>
@@ -44,6 +46,9 @@
     <remap from="~/out/mask" to="$(var output/mask)"/>
     <param from="$(var yolox_param_path)" allow_substs="true"/>
     <param name="build_only" value="$(var build_only)"/>
-    <param name="color_map_path" value="$(var model_path)/bdd100k_semseg.csv"/>
+    <param name="color_map_path" value="$(var model_path)/semseg_color_map.csv"/>
+    <param name="is_roi_overlap_segment" value="$(var is_roi_overlap_segment)"/>
+    <param name="is_publish_color_mask" value="$(var is_publish_color_mask)"/>
+    <param name="overlap_roi_score_threshold" value="$(var overlap_roi_score_threshold)"/>
   </node>
 </launch>
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 9b5a6aa75df72..77d71c65e3c01 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -109,7 +109,7 @@ std::vector<tensorrt_yolox::Colormap> get_seg_colormap(const std::string & filen
   std::vector<tensorrt_yolox::Colormap> seg_cmap;
   if (filename != "not-specified") {
     std::vector<std::string> color_list = loadListFromTextFile(filename);
-    for (int i = 0; i < (int)color_list.size(); i++) {
+    for (int i = 0; i < static_cast<int>(color_list.size()); i++) {
       if (i == 0) {
         // Skip header
         continue;
@@ -120,7 +120,7 @@ std::vector<tensorrt_yolox::Colormap> get_seg_colormap(const std::string & filen
       size_t npos = colormapString.find_first_of(',');
       assert(npos != std::string::npos);
       std::string substr = colormapString.substr(0, npos);
-      int id = (int)std::stoi(trim(substr));
+      int id = static_cast<int>(std::stoi(trim(substr)));
       colormapString.erase(0, npos + 1);
 
       npos = colormapString.find_first_of(',');
@@ -157,7 +157,7 @@ namespace tensorrt_yolox
 TrtYoloX::TrtYoloX(
   const std::string & model_path, const std::string & precision, const std::string & color_map_path,
   const int num_class, const float score_threshold, const float nms_threshold,
-  tensorrt_common::BuildConfig build_config, const bool use_gpu_preprocess,
+  tensorrt_common::BuildConfig build_config, const bool use_gpu_preprocess, bool publish_color_mask,
   std::string calibration_image_list_path, const double norm_factor,
   [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config,
   const size_t max_workspace_size)
@@ -167,7 +167,8 @@ TrtYoloX::TrtYoloX(
   norm_factor_ = norm_factor;
   batch_size_ = batch_config[2];
   multitask_ = 0;
-  color_map_ = get_seg_colormap(color_map_path);
+  sematic_color_map_ = get_seg_colormap(color_map_path);
+  publish_color_mask_ = publish_color_mask;
   if (precision == "int8") {
     if (build_config.clip_value <= 0.0) {
       if (calibration_image_list_path.empty()) {
@@ -388,13 +389,14 @@ void TrtYoloX::initPreprocessBuffer(int width, int height)
       for (int m = 0; m < multitask_; m++) {
         const auto output_dims =
           trt_common_->getBindingDimensions(m + 2);  // 0 : input, 1 : output for detections
-        const float scale =
-          std::min(output_dims.d[3] / float(width), output_dims.d[2] / float(height));
-        int out_w = (int)(width * scale);
-        int out_h = (int)(height * scale);
-        //	size_t out_elem_num = std::accumulate(
+        const float scale = std::min(
+          output_dims.d[3] / static_cast<float>(width),
+          output_dims.d[2] / static_cast<float>(height));
+        int out_w = static_cast<int>(width * scale);
+        int out_h = static_cast<int>(height * scale);
+        // size_t out_elem_num = std::accumulate(
         // output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies<int>());
-        //	out_elem_num = out_elem_num * batch_size_;
+        // out_elem_num = out_elem_num * batch_size_;
         size_t out_elem_num = out_w * out_h * batch_size_;
         argmax_out_elem_num += out_elem_num;
       }
@@ -468,8 +470,9 @@ void TrtYoloX::preprocessGpu(const std::vector<cv::Mat> & images)
       for (int m = 0; m < multitask_; m++) {
         const auto output_dims =
           trt_common_->getBindingDimensions(m + 2);  // 0: input, 1: output for detections
-        const float scale =
-          std::min(output_dims.d[3] / float(image.cols), output_dims.d[2] / float(image.rows));
+        const float scale = std::min(
+          output_dims.d[3] / static_cast<float>(image.cols),
+          output_dims.d[2] / static_cast<float>(image.rows));
         int out_w = static_cast<int>(image.cols * scale);
         int out_h = static_cast<int>(image.rows * scale);
         argmax_out_elem_num += out_w * out_h * batch_size;
@@ -545,8 +548,8 @@ void TrtYoloX::preprocess(const std::vector<cv::Mat> & images)
 }
 
 bool TrtYoloX::doInference(
-  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask,
-  [[maybe_unused]] cv::Mat & color_mask)
+  const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
+  [[maybe_unused]] std::vector<cv::Mat> & color_masks)
 {
   if (!trt_common_->isInitialized()) {
     return false;
@@ -559,7 +562,7 @@ bool TrtYoloX::doInference(
   }
 
   if (needs_output_decode_) {
-    return feedforwardAndDecode(images, objects, mask, color_mask);
+    return feedforwardAndDecode(images, objects, masks, color_masks);
   } else {
     return feedforward(images, objects);
   }
@@ -799,8 +802,8 @@ void TrtYoloX::multiScalePreprocess(const cv::Mat & image, const std::vector<cv:
 bool TrtYoloX::doInferenceWithRoi(
   const std::vector<cv::Mat> & images, ObjectArrays & objects, const std::vector<cv::Rect> & rois)
 {
-  cv::Mat mask;
-  cv::Mat color_mask;
+  std::vector<cv::Mat> masks;
+  std::vector<cv::Mat> color_masks;
   if (!trt_common_->isInitialized()) {
     return false;
   }
@@ -811,7 +814,7 @@ bool TrtYoloX::doInferenceWithRoi(
   }
 
   if (needs_output_decode_) {
-    return feedforwardAndDecode(images, objects, mask, color_mask);
+    return feedforwardAndDecode(images, objects, masks, color_masks);
   } else {
     return feedforward(images, objects);
   }
@@ -890,8 +893,8 @@ bool TrtYoloX::feedforward(const std::vector<cv::Mat> & images, ObjectArrays & o
 }
 
 bool TrtYoloX::feedforwardAndDecode(
-  const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & out_mask,
-  [[maybe_unused]] cv::Mat & color_mask)
+  const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & out_masks,
+  [[maybe_unused]] std::vector<cv::Mat> & color_masks)
 {
   std::vector<void *> buffers = {input_d_.get(), out_prob_d_.get()};
   if (multitask_) {
@@ -914,16 +917,20 @@ bool TrtYoloX::feedforwardAndDecode(
 
   for (size_t i = 0; i < batch_size; ++i) {
     auto image_size = images[i].size();
+    auto & out_mask = out_masks[i];
+    auto & color_mask = color_masks[i];
     float * batch_prob = out_prob_h_.get() + (i * out_elem_num_per_batch_);
     ObjectArray object_array;
     decodeOutputs(batch_prob, object_array, scales_[i], image_size);
+    // add refine mask using object
     objects.emplace_back(object_array);
     if (multitask_) {
       segmentation_masks_.clear();
       float * segmentation_results =
         segmentation_out_prob_h_.get() + (i * segmentation_out_elem_num_per_batch_);
       size_t counter = 0;
-      int batch = (int)(segmentation_out_elem_num_ / segmentation_out_elem_num_per_batch_);
+      int batch =
+        static_cast<int>(segmentation_out_elem_num_ / segmentation_out_elem_num_per_batch_);
       for (int m = 0; m < multitask_; m++) {
         const auto output_dims =
           trt_common_->getBindingDimensions(m + 2);  // 0 : input, 1 : output for detections
@@ -931,9 +938,10 @@ bool TrtYoloX::feedforwardAndDecode(
           output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies<int>());
         out_elem_num = out_elem_num * batch;
         const float scale = std::min(
-          output_dims.d[3] / float(image_size.width), output_dims.d[2] / float(image_size.height));
-        int out_w = (int)(image_size.width * scale);
-        int out_h = (int)(image_size.height * scale);
+          output_dims.d[3] / static_cast<float>(image_size.width),
+          output_dims.d[2] / static_cast<float>(image_size.height));
+        int out_w = static_cast<int>(image_size.width * scale);
+        int out_h = static_cast<int>(image_size.height * scale);
         cv::Mat mask;
         if (use_gpu_preprocess_) {
           float * d_segmentation_results =
@@ -945,8 +953,16 @@ bool TrtYoloX::feedforwardAndDecode(
         segmentation_masks_.push_back(mask);
         counter += out_elem_num;
       }
-      out_mask = segmentation_masks_.at(0);
-      color_mask = getColorizedMask(0, color_map_);
+    } else {
+      continue;
+    }
+    // Assume semantic segmentation is first task
+    // This should remove when the segmentation accuracy is high
+    out_mask = segmentation_masks_.at(0);
+
+    // publish color mask for visualization
+    if (publish_color_mask_) {
+      color_mask = getColorizedMask(0, sematic_color_map_);
     }
   }
   return true;
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index f79adfa9f71ee..38b172b0334ca 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -94,6 +94,10 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
     RCLCPP_ERROR(this->get_logger(), "Could not find label file");
     rclcpp::shutdown();
   }
+
+  is_roi_overlap_segment_ = declare_parameter<bool>("is_roi_overlap_segment");
+  is_publish_color_mask_ = declare_parameter<bool>("is_publish_color_mask");
+  overlap_roi_score_threshold_ = declare_parameter<float>("overlap_roi_score_threshold");
   replaceLabelMap();
 
   tensorrt_common::BuildConfig build_config(
@@ -102,7 +106,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
 
   trt_yolox_ = std::make_unique<tensorrt_yolox::TrtYoloX>(
     model_path, precision, color_map_path, label_map_.size(), score_threshold, nms_threshold,
-    build_config, preprocess_on_gpu, calibration_image_list_path);
+    build_config, preprocess_on_gpu, is_publish_color_mask_, calibration_image_list_path);
 
   timer_ =
     rclcpp::create_timer(this, get_clock(), 100ms, std::bind(&TrtYoloXNode::onConnect, this));
@@ -150,13 +154,19 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
   const auto height = in_image_ptr->image.rows;
 
   tensorrt_yolox::ObjectArrays objects;
-  cv::Mat mask(cv::Size(height, width), CV_8UC1, cv::Scalar(0));
-  cv::Mat color_mask(cv::Size(height, width), CV_8UC3, cv::Scalar(0, 0, 0));
+  std::vector<cv::Mat> masks = {cv::Mat(cv::Size(height, width), CV_8UC1, cv::Scalar(0))};
+  std::vector<cv::Mat> color_masks = {
+    cv::Mat(cv::Size(height, width), CV_8UC3, cv::Scalar(0, 0, 0))};
 
-  if (!trt_yolox_->doInference({in_image_ptr->image}, objects, mask, color_mask)) {
+  if (!trt_yolox_->doInference({in_image_ptr->image}, objects, masks, color_masks)) {
     RCLCPP_WARN(this->get_logger(), "Fail to inference");
     return;
   }
+  auto & mask = masks.at(0);
+  cv::resize(
+    mask, mask, cv::Size(in_image_ptr->image.cols, in_image_ptr->image.rows), 0, 0,
+    cv::INTER_NEAREST);
+
   for (const auto & yolox_object : objects.at(0)) {
     tier4_perception_msgs::msg::DetectedObjectWithFeature object;
     object.feature.roi.x_offset = yolox_object.x_offset;
@@ -176,29 +186,32 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
     cv::rectangle(
       in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3,
       8, 0);
+    // Refine mask: replacing segmentation mask by roi class
+    if (is_roi_overlap_segment_) {
+      overlapSegmentByRoi(yolox_object, mask);
+    }
   }
-  cv::resize(
-    mask, mask, cv::Size(in_image_ptr->image.cols, in_image_ptr->image.rows), 0, 0,
-    cv::INTER_NEAREST);
   sensor_msgs::msg::Image::SharedPtr out_mask_msg =
     cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::MONO8, mask)
       .toImageMsg();
   out_mask_msg->header = msg->header;
   mask_pub_.publish(out_mask_msg);
 
-  cv::resize(
-    color_mask, color_mask, cv::Size(in_image_ptr->image.cols, in_image_ptr->image.rows), 0, 0,
-    cv::INTER_NEAREST);
-  sensor_msgs::msg::Image::SharedPtr output_color_mask_msg =
-    cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::BGR8, color_mask)
-      .toImageMsg();
-  output_color_mask_msg->header = msg->header;
-  color_mask_pub_.publish(output_color_mask_msg);
-
   image_pub_.publish(in_image_ptr->toImageMsg());
-
   out_objects.header = msg->header;
   objects_pub_->publish(out_objects);
+
+  if (is_publish_color_mask_) {
+    auto & color_mask = color_masks.at(0);
+    cv::resize(
+      color_mask, color_mask, cv::Size(in_image_ptr->image.cols, in_image_ptr->image.rows), 0, 0,
+      cv::INTER_NEAREST);
+    sensor_msgs::msg::Image::SharedPtr output_color_mask_msg =
+      cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::BGR8, color_mask)
+        .toImageMsg();
+    output_color_mask_msg->header = msg->header;
+    color_mask_pub_.publish(output_color_mask_msg);
+  }
 }
 
 bool TrtYoloXNode::readLabelFile(const std::string & label_path)
@@ -235,6 +248,35 @@ void TrtYoloXNode::replaceLabelMap()
   }
 }
 
+int TrtYoloXNode::mapRoiLabel2SegLabel(const int32_t roi_label_index)
+{
+  auto & roi_label = label_map_[roi_label_index];
+  if (roi_label == "CAR" || roi_label == "BUS" || roi_label == "TRUCK") {
+    return static_cast<int>(roi_label_index + 11);
+  }
+  if (roi_label == "PEDESTRIAN") {
+    return 11;  // person index in segment_color_map
+  }
+  if (roi_label == "MOTORCYCLE") {
+    return 17;  // motocycle index in segment_color_map
+  }
+  if (roi_label == "BICYCLE") {
+    return 18;  // bicycle index in segment_color_map
+  }
+  return -1;
+}
+
+void TrtYoloXNode::overlapSegmentByRoi(const tensorrt_yolox::Object & roi_object, cv::Mat & mask)
+{
+  if (roi_object.score < overlap_roi_score_threshold_) return;
+  cv::Mat submat = mask.colRange(roi_object.x_offset, roi_object.width)
+                     .rowRange(roi_object.y_offset, roi_object.height);
+  int seg_class_index = mapRoiLabel2SegLabel(roi_object.type);
+  if (seg_class_index < 0) return;
+  cv::Mat replace_roi(cv::Size(), mask.type(), seg_class_index);
+  replace_roi.copyTo(submat);
+}
+
 }  // namespace tensorrt_yolox
 
 #include "rclcpp_components/register_node_macro.hpp"
diff --git a/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
index 5fd54a960b9de..46170d85ea1cc 100644
--- a/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
+++ b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
@@ -50,9 +50,9 @@ class YoloXSingleImageInferenceNode : public rclcpp::Node
       std::make_unique<tensorrt_yolox::TrtYoloX>(model_path, precision, color_map_path);
     auto image = cv::imread(image_path);
     tensorrt_yolox::ObjectArrays objects;
-    cv::Mat mask;
-    cv::Mat color_mask;
-    trt_yolox->doInference({image}, objects, mask, color_mask);
+    std::vector<cv::Mat> masks;
+    std::vector<cv::Mat> color_masks;
+    trt_yolox->doInference({image}, objects, masks, color_masks);
     for (const auto & object : objects[0]) {
       const auto left = object.x_offset;
       const auto top = object.y_offset;

From f3d1bf6bd6373362d70a81eaca470ef9f799cd24 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Mon, 20 Nov 2023 17:47:47 +0900
Subject: [PATCH 10/39] fix: roi overlay segment

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../include/tensorrt_yolox/tensorrt_yolox.hpp |  5 +-
 .../tensorrt_yolox/src/tensorrt_yolox.cpp     | 15 ++----
 .../src/tensorrt_yolox_node.cpp               | 46 +++++++++++--------
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
index 4f0f147eefed4..a0b8cd006cc32 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -153,7 +153,10 @@ class TrtYoloX
    * @param[in] index multitask index
    * @param[in] colormap colormap for masks
    */
-  cv::Mat getColorizedMask(int index, std::vector<Colormap> & colormap);
+    void getColorizedMask(
+    const std::vector<tensorrt_yolox::Colormap> & colormap, const cv::Mat & mask,
+    cv::Mat & colorized_mask);
+  inline std::vector<Colormap> getColorMap() { return sematic_color_map_; }
 
 private:
   /**
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 77d71c65e3c01..46cdf4cb2c7bf 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -918,7 +918,6 @@ bool TrtYoloX::feedforwardAndDecode(
   for (size_t i = 0; i < batch_size; ++i) {
     auto image_size = images[i].size();
     auto & out_mask = out_masks[i];
-    auto & color_mask = color_masks[i];
     float * batch_prob = out_prob_h_.get() + (i * out_elem_num_per_batch_);
     ObjectArray object_array;
     decodeOutputs(batch_prob, object_array, scales_[i], image_size);
@@ -957,13 +956,7 @@ bool TrtYoloX::feedforwardAndDecode(
       continue;
     }
     // Assume semantic segmentation is first task
-    // This should remove when the segmentation accuracy is high
     out_mask = segmentation_masks_.at(0);
-
-    // publish color mask for visualization
-    if (publish_color_mask_) {
-      color_mask = getColorizedMask(0, sematic_color_map_);
-    }
   }
   return true;
 }
@@ -1283,13 +1276,12 @@ int TrtYoloX::getMultitaskNum(void)
   return multitask_;
 }
 
-cv::Mat TrtYoloX::getColorizedMask(int index, std::vector<Colormap> & colormap)
+void TrtYoloX::getColorizedMask(
+  const std::vector<tensorrt_yolox::Colormap> & colormap, const cv::Mat & mask, cv::Mat & cmask)
 {
-  cv::Mat mask;
-  mask = segmentation_masks_[index];
   int width = mask.cols;
   int height = mask.rows;
-  cv::Mat cmask = cv::Mat::zeros(height, width, CV_8UC3);
+  // TODO: check size of mask and cmask
   for (int y = 0; y < height; y++) {
     for (int x = 0; x < width; x++) {
       unsigned char id = mask.at<unsigned char>(y, x);
@@ -1298,7 +1290,6 @@ cv::Mat TrtYoloX::getColorizedMask(int index, std::vector<Colormap> & colormap)
       cmask.at<cv::Vec3b>(y, x)[2] = colormap[id].color[0];
     }
   }
-  return cmask;
 }
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 38b172b0334ca..526d458d0fcf8 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -187,6 +187,7 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
       in_image_ptr->image, cv::Point(left, top), cv::Point(right, bottom), cv::Scalar(0, 0, 255), 3,
       8, 0);
     // Refine mask: replacing segmentation mask by roi class
+    // This should remove when the segmentation accuracy is high
     if (is_roi_overlap_segment_) {
       overlapSegmentByRoi(yolox_object, mask);
     }
@@ -202,10 +203,9 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
   objects_pub_->publish(out_objects);
 
   if (is_publish_color_mask_) {
-    auto & color_mask = color_masks.at(0);
-    cv::resize(
-      color_mask, color_mask, cv::Size(in_image_ptr->image.cols, in_image_ptr->image.rows), 0, 0,
-      cv::INTER_NEAREST);
+    cv::Mat color_mask =
+      cv::Mat::zeros(in_image_ptr->image.rows, in_image_ptr->image.cols, CV_8UC3);
+    trt_yolox_->getColorizedMask(trt_yolox_->getColorMap(), mask, color_mask);
     sensor_msgs::msg::Image::SharedPtr output_color_mask_msg =
       cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::BGR8, color_mask)
         .toImageMsg();
@@ -250,18 +250,23 @@ void TrtYoloXNode::replaceLabelMap()
 
 int TrtYoloXNode::mapRoiLabel2SegLabel(const int32_t roi_label_index)
 {
-  auto & roi_label = label_map_[roi_label_index];
-  if (roi_label == "CAR" || roi_label == "BUS" || roi_label == "TRUCK") {
-    return static_cast<int>(roi_label_index + 11);
-  }
-  if (roi_label == "PEDESTRIAN") {
-    return 11;  // person index in segment_color_map
-  }
-  if (roi_label == "MOTORCYCLE") {
-    return 17;  // motocycle index in segment_color_map
-  }
-  if (roi_label == "BICYCLE") {
-    return 18;  // bicycle index in segment_color_map
+  switch (roi_label_index) {
+    case 0:
+      return 5;
+    case 1:
+      return 13;
+    case 2:
+      return 14;
+    case 3:
+      return 15;
+    case 4:
+      return 18;
+    case 5:
+      return 17;
+    case 6:
+      return 11;
+    default:
+      return -1;
   }
   return -1;
 }
@@ -269,12 +274,13 @@ int TrtYoloXNode::mapRoiLabel2SegLabel(const int32_t roi_label_index)
 void TrtYoloXNode::overlapSegmentByRoi(const tensorrt_yolox::Object & roi_object, cv::Mat & mask)
 {
   if (roi_object.score < overlap_roi_score_threshold_) return;
-  cv::Mat submat = mask.colRange(roi_object.x_offset, roi_object.width)
-                     .rowRange(roi_object.y_offset, roi_object.height);
   int seg_class_index = mapRoiLabel2SegLabel(roi_object.type);
   if (seg_class_index < 0) return;
-  cv::Mat replace_roi(cv::Size(), mask.type(), seg_class_index);
-  replace_roi.copyTo(submat);
+  cv::Mat replace_roi(
+    cv::Size(roi_object.width, roi_object.height), mask.type(),
+    static_cast<uint8_t>(seg_class_index));
+  replace_roi.copyTo(mask.colRange(roi_object.x_offset, roi_object.x_offset + roi_object.width)
+                       .rowRange(roi_object.y_offset, roi_object.y_offset + roi_object.height));
 }
 
 }  // namespace tensorrt_yolox

From 295fae6d1e016e79c04ab84aa353167d56b0fc82 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Mon, 20 Nov 2023 21:27:46 +0900
Subject: [PATCH 11/39] chore: refactor

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../config/yolox_s_plus_opt.param.yaml        | 20 +++++++++++
 .../include/tensorrt_yolox/tensorrt_yolox.hpp | 12 ++-----
 .../tensorrt_yolox/tensorrt_yolox_node.hpp    | 15 ++++++++
 .../launch/yolox_s_plus_opt.launch.xml        |  8 ++---
 perception/tensorrt_yolox/package.xml         |  1 +
 .../tensorrt_yolox/src/tensorrt_yolox.cpp     |  8 +++--
 .../src/tensorrt_yolox_node.cpp               | 36 +++++++++----------
 7 files changed, 64 insertions(+), 36 deletions(-)

diff --git a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
index bc67173442094..d94f4ca004ee3 100644
--- a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
+++ b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
@@ -1,3 +1,23 @@
+/**:
+  ros__parameters:
+    # refine segmentation mask by overlay roi class
+    # disable when sematic segmentation accuracy is good enough
+    is_roi_overlap_segment: true
+
+    # minimum existence_probability of detected roi considered to replace segmentation
+    overlap_roi_score_threshold: 0.3
+
+    # publish color mask for result visualization
+    is_publish_color_mask: false
+
+    roi_overlay_segment_label:
+      UNKNOWN : true
+      CAR : true
+      TRUCK : true
+      BUS : true
+      MOTORCYCLE : true
+      BICYCLE : true
+      PEDESTRIAN : true
 /**:
   ros__parameters:
     model_path: "$(var data_path)/tensorrt_yolox/$(var model_name).onnx"
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
index a0b8cd006cc32..f5c132761a85d 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -78,7 +78,6 @@ class TrtYoloX
    * @param[in] build_config configuration including precision, calibration method, DLA, remaining
    * fp16 for first layer,  remaining fp16 for last layer and profiler for builder
    * @param[in] use_gpu_preprocess whether use cuda gpu for preprocessing
-   * @param[in] publish_color_mask whether publish color_mask for debugging and visualization
    * @param[in] calibration_image_list_file path for calibration files (only require for
    * quantization)
    * @param[in] norm_factor scaling factor for preprocess
@@ -91,9 +90,8 @@ class TrtYoloX
     const std::string & color_map_path, const int num_class = 8, const float score_threshold = 0.3,
     const float nms_threshold = 0.7,
     const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(),
-    const bool use_gpu_preprocess = false, const bool publish_color_mask = false,
-    std::string calibration_image_list_file = std::string(), const double norm_factor = 1.0,
-    [[maybe_unused]] const std::string & cache_dir = "",
+    const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(),
+    const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "",
     const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
     const size_t max_workspace_size = (1 << 30));
   /**
@@ -153,7 +151,7 @@ class TrtYoloX
    * @param[in] index multitask index
    * @param[in] colormap colormap for masks
    */
-    void getColorizedMask(
+  void getColorizedMask(
     const std::vector<tensorrt_yolox::Colormap> & colormap, const cv::Mat & mask,
     cv::Mat & colorized_mask);
   inline std::vector<Colormap> getColorMap() { return sematic_color_map_; }
@@ -313,10 +311,6 @@ class TrtYoloX
   // device buffer for argmax postprocessing  on GPU
   CudaUniquePtr<unsigned char[]> argmax_buf_d_;
   std::vector<tensorrt_yolox::Colormap> sematic_color_map_;
-  // flag whether overlay segmentation by roi
-  bool roi_overlap_segment_;
-  // flag where publish color mask for debugging and visualization
-  bool publish_color_mask_;
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
index 9bc88d89cacf6..b90d86025bca7 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -15,6 +15,9 @@
 #ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
 #define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
 
+#include "object_recognition_utils/object_recognition_utils.hpp"
+#include "utils/utils.hpp"
+
 #include <image_transport/image_transport.hpp>
 #include <opencv2/opencv.hpp>
 #include <rclcpp/rclcpp.hpp>
@@ -68,6 +71,18 @@ class TrtYoloXNode : public rclcpp::Node
   bool is_roi_overlap_segment_;
   bool is_publish_color_mask_;
   float overlap_roi_score_threshold_;
+  // TODO(badai-nguyen): change to function
+  std::map<std::string, int> remap_roi_to_semantic_ = {
+    {"UNKNOWN", 19},     // other
+    {"ANIMAL", 19},      // other
+    {"PEDESTRIAN", 11},  // person
+    {"CAR", 13},         // car
+    {"TRUCK", 14},       // truck
+    {"BUS", 15},         // bus
+    {"BICYCLE", 18},     // bicycle
+    {"MOTORBIKE", 17},   // motorcycle
+  };
+  utils::FilterTargetLabel roi_overlay_segment_labels_;
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 5cc2896d6522e..39e3fc65d245c 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -32,9 +32,7 @@
   <arg name="calibration_image_list_path" default="" description="Path to a file which contains path to images. Those images will be used for int8 quantization."/>
   <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>
-  <arg name="is_roi_overlap_segment" default="true" description="refine segmentation mask by overlay roi class, disable when sematic segmentation accuracy is good enough"/>
-  <arg name="is_publish_color_mask" default="false" description="publish color mask for result visualization"/>
-  <arg name="overlap_roi_score_threshold" default="0.3" description="the roi object existance probability threshold that consider to replace segmentation"/>
+  <arg name="yolox_s_plus_opt_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
   <node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
     <remap from="~/input/compressed_image" to="$(var input/image)/compressed"/>
     <remap from="~/output/raw_image" to="$(var input/image)"/>
@@ -47,8 +45,6 @@
     <param from="$(var yolox_param_path)" allow_substs="true"/>
     <param name="build_only" value="$(var build_only)"/>
     <param name="color_map_path" value="$(var model_path)/semseg_color_map.csv"/>
-    <param name="is_roi_overlap_segment" value="$(var is_roi_overlap_segment)"/>
-    <param name="is_publish_color_mask" value="$(var is_publish_color_mask)"/>
-    <param name="overlap_roi_score_threshold" value="$(var overlap_roi_score_threshold)"/>
+    <param from="$(var yolox_s_plus_opt_param_path)"/>
   </node>
 </launch>
diff --git a/perception/tensorrt_yolox/package.xml b/perception/tensorrt_yolox/package.xml
index 2afbfcb9fe3ed..6eb1b39202003 100644
--- a/perception/tensorrt_yolox/package.xml
+++ b/perception/tensorrt_yolox/package.xml
@@ -21,6 +21,7 @@
   <depend>autoware_auto_perception_msgs</depend>
   <depend>cuda_utils</depend>
   <depend>cv_bridge</depend>
+  <depend>detected_object_validation</depend>
   <depend>image_transport</depend>
   <depend>libopencv-dev</depend>
   <depend>object_recognition_utils</depend>
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 46cdf4cb2c7bf..97c8579f2d79a 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -157,7 +157,7 @@ namespace tensorrt_yolox
 TrtYoloX::TrtYoloX(
   const std::string & model_path, const std::string & precision, const std::string & color_map_path,
   const int num_class, const float score_threshold, const float nms_threshold,
-  tensorrt_common::BuildConfig build_config, const bool use_gpu_preprocess, bool publish_color_mask,
+  tensorrt_common::BuildConfig build_config, const bool use_gpu_preprocess,
   std::string calibration_image_list_path, const double norm_factor,
   [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config,
   const size_t max_workspace_size)
@@ -168,7 +168,6 @@ TrtYoloX::TrtYoloX(
   batch_size_ = batch_config[2];
   multitask_ = 0;
   sematic_color_map_ = get_seg_colormap(color_map_path);
-  publish_color_mask_ = publish_color_mask;
   if (precision == "int8") {
     if (build_config.clip_value <= 0.0) {
       if (calibration_image_list_path.empty()) {
@@ -1281,7 +1280,10 @@ void TrtYoloX::getColorizedMask(
 {
   int width = mask.cols;
   int height = mask.rows;
-  // TODO: check size of mask and cmask
+  if ((cmask.cols != mask.cols) || (cmask.rows != mask.rows)) {
+    throw std::runtime_error("input and output image have difference size.");
+    return;
+  }
   for (int y = 0; y < height; y++) {
     for (int x = 0; x < width; x++) {
       unsigned char id = mask.at<unsigned char>(y, x);
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 526d458d0fcf8..52d24877ee7ef 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -98,6 +98,17 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
   is_roi_overlap_segment_ = declare_parameter<bool>("is_roi_overlap_segment");
   is_publish_color_mask_ = declare_parameter<bool>("is_publish_color_mask");
   overlap_roi_score_threshold_ = declare_parameter<float>("overlap_roi_score_threshold");
+  roi_overlay_segment_labels_.UNKNOWN =
+    declare_parameter<bool>("roi_overlay_segment_label.UNKNOWN");
+  roi_overlay_segment_labels_.CAR = declare_parameter<bool>("roi_overlay_segment_label.CAR");
+  roi_overlay_segment_labels_.TRUCK = declare_parameter<bool>("roi_overlay_segment_label.TRUCK");
+  roi_overlay_segment_labels_.BUS = declare_parameter<bool>("roi_overlay_segment_label.BUS");
+  roi_overlay_segment_labels_.MOTORCYCLE =
+    declare_parameter<bool>("roi_overlay_segment_label.MOTORCYCLE");
+  roi_overlay_segment_labels_.BICYCLE =
+    declare_parameter<bool>("roi_overlay_segment_label.BICYCLE");
+  roi_overlay_segment_labels_.PEDESTRIAN =
+    declare_parameter<bool>("roi_overlay_segment_label.PEDESTRIAN");
   replaceLabelMap();
 
   tensorrt_common::BuildConfig build_config(
@@ -106,7 +117,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
 
   trt_yolox_ = std::make_unique<tensorrt_yolox::TrtYoloX>(
     model_path, precision, color_map_path, label_map_.size(), score_threshold, nms_threshold,
-    build_config, preprocess_on_gpu, is_publish_color_mask_, calibration_image_list_path);
+    build_config, preprocess_on_gpu, calibration_image_list_path);
 
   timer_ =
     rclcpp::create_timer(this, get_clock(), 100ms, std::bind(&TrtYoloXNode::onConnect, this));
@@ -163,6 +174,7 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
     return;
   }
   auto & mask = masks.at(0);
+  // TODO(badai-nguyen): change to postprocess on gpu option
   cv::resize(
     mask, mask, cv::Size(in_image_ptr->image.cols, in_image_ptr->image.rows), 0, 0,
     cv::INTER_NEAREST);
@@ -192,6 +204,7 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
       overlapSegmentByRoi(yolox_object, mask);
     }
   }
+  // TODO(badai-nguyen): consider to change to 4bits data transfer
   sensor_msgs::msg::Image::SharedPtr out_mask_msg =
     cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::MONO8, mask)
       .toImageMsg();
@@ -250,23 +263,10 @@ void TrtYoloXNode::replaceLabelMap()
 
 int TrtYoloXNode::mapRoiLabel2SegLabel(const int32_t roi_label_index)
 {
-  switch (roi_label_index) {
-    case 0:
-      return 5;
-    case 1:
-      return 13;
-    case 2:
-      return 14;
-    case 3:
-      return 15;
-    case 4:
-      return 18;
-    case 5:
-      return 17;
-    case 6:
-      return 11;
-    default:
-      return -1;
+  if (roi_overlay_segment_labels_.isTarget(static_cast<uint8_t>(roi_label_index))) {
+    std::string label = label_map_[roi_label_index];
+
+    return remap_roi_to_semantic_[label];
   }
   return -1;
 }

From aafb438c4d079b12e97f6f18089a9ebffb899149 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Tue, 21 Nov 2023 01:18:04 +0900
Subject: [PATCH 12/39] docs: update readme

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/README.md | 81 +++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 21 deletions(-)

diff --git a/perception/tensorrt_yolox/README.md b/perception/tensorrt_yolox/README.md
index ca407b1ff6811..05d6c06208387 100644
--- a/perception/tensorrt_yolox/README.md
+++ b/perception/tensorrt_yolox/README.md
@@ -2,13 +2,13 @@
 
 ## Purpose
 
-This package detects target objects e.g., cars, trucks, bicycles, and pedestrians on a image based on [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) model.
+This package detects target objects e.g., cars, trucks, bicycles, and pedestrians with semantic segmentation header including vehicle such as cars, trucks, buses and pedestrian, building, vegetation, road, sidewalk on a image based on [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) model.
 
 ## Inner-workings / Algorithms
 
 ### Cite
 
-<!-- cspell: ignore Zheng, Songtao, Feng, Zeming, Jian -->
+<!-- cspell: ignore Zheng, Songtao, Feng, Zeming, Jian, semseg -->
 
 Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, Jian Sun, "YOLOX: Exceeding YOLO Series in 2021", arXiv preprint arXiv:2107.08430, 2021 [[ref](https://arxiv.org/abs/2107.08430)]
 
@@ -22,10 +22,12 @@ Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, Jian Sun, "YOLOX: Exceeding YOLO Se
 
 ### Output
 
-| Name          | Type                                               | Description                                        |
-| ------------- | -------------------------------------------------- | -------------------------------------------------- |
-| `out/objects` | `tier4_perception_msgs/DetectedObjectsWithFeature` | The detected objects with 2D bounding boxes        |
-| `out/image`   | `sensor_msgs/Image`                                | The image with 2D bounding boxes for visualization |
+| Name             | Type                                               | Description                                                         |
+| ---------------- | -------------------------------------------------- | ------------------------------------------------------------------- |
+| `out/objects`    | `tier4_perception_msgs/DetectedObjectsWithFeature` | The detected objects with 2D bounding boxes                         |
+| `out/image`      | `sensor_msgs/Image`                                | The image with 2D bounding boxes for visualization                  |
+| `out/mask`       | `sensor_msgs/Image`                                | The semantic segmentation mask                                      |
+| `out/color_mask` | `sensor_msgs/Image`                                | The colorized image of semantic segmentation mask for visualization |
 
 ## Parameters
 
@@ -40,20 +42,32 @@ Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, Jian Sun, "YOLOX: Exceeding YOLO Se
 
 ### Node Parameters
 
-| Name                          | Type   | Default Value | Description                                                                                                                                                                                                                              |
-| ----------------------------- | ------ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model_path`                  | string | ""            | The onnx file name for yolox model                                                                                                                                                                                                       |
-| `label_path`                  | string | ""            | The label file with label names for detected objects written on it                                                                                                                                                                       |
-| `precision`                   | string | "fp16"        | The inference mode: "fp32", "fp16", "int8"                                                                                                                                                                                               |
-| `build_only`                  | bool   | false         | shutdown node after TensorRT engine file is built                                                                                                                                                                                        |
-| `calibration_algorithm`       | string | "MinMax"      | Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: Entropy",("Legacy" \| "Percentile"), "MinMax"]                                                                                            |
-| `dla_core_id`                 | int    | -1            | If positive ID value is specified, the node assign inference task to the DLA core                                                                                                                                                        |
-| `quantize_first_layer`        | bool   | false         | If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8                                                                                                              |
-| `quantize_last_layer`         | bool   | false         | If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8                                                                                                              |
-| `profile_per_layer`           | bool   | false         | If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose.                                                                 |
-| `clip_value`                  | double | 0.0           | If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration |
-| `preprocess_on_gpu`           | bool   | true          | If true, pre-processing is performed on GPU                                                                                                                                                                                              |
-| `calibration_image_list_path` | string | ""            | Path to a file which contains path to images. Those images will be used for int8 quantization.                                                                                                                                           |
+| Name                                   | Type   | Default Value | Description                                                                                                                                                                                                                              |
+| -------------------------------------- | ------ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model_path`                           | string | ""            | The onnx file name for yolox model                                                                                                                                                                                                       |
+| `label_path`                           | string | ""            | The label file with label names for detected objects written on it                                                                                                                                                                       |
+| `precision`                            | string | "fp16"        | The inference mode: "fp32", "fp16", "int8"                                                                                                                                                                                               |
+| `build_only`                           | bool   | false         | shutdown node after TensorRT engine file is built                                                                                                                                                                                        |
+| `calibration_algorithm`                | string | "MinMax"      | Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: Entropy",("Legacy" \| "Percentile"), "MinMax"]                                                                                            |
+| `dla_core_id`                          | int    | -1            | If positive ID value is specified, the node assign inference task to the DLA core                                                                                                                                                        |
+| `quantize_first_layer`                 | bool   | false         | If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8                                                                                                              |
+| `quantize_last_layer`                  | bool   | false         | If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8                                                                                                              |
+| `profile_per_layer`                    | bool   | false         | If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose.                                                                 |
+| `clip_value`                           | double | 0.0           | If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration |
+| `preprocess_on_gpu`                    | bool   | true          | If true, pre-processing is performed on GPU                                                                                                                                                                                              |
+| `calibration_image_list_path`          | string | ""            | Path to a file which contains path to images. Those images will be used for int8 quantization.                                                                                                                                           |
+| `yolox_s_plus_opt_param_path`          | string | ""            | Path to parameter file                                                                                                                                                                                                                   |
+| `is_publish_color_mask`                | bool   | false         | If true, publish color mask for result visualization                                                                                                                                                                                     |
+| `is_roi_overlap_segment`               | bool   | true          | If true, overlay detected object roi onto semantic segmentation as roi higher priority                                                                                                                                                   |
+| `overlap_roi_score_threshold`          | float  | 0.3           | minimum existence_probability of detected roi considered to replace segmentation                                                                                                                                                         |
+| `roi_overlay_segment_label.UNKNOWN`    | bool   | true          | If true, unknown objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                             |
+| `roi_overlay_segment_label.CAR`        | bool   | true          | If true, car objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                                 |
+| `roi_overlay_segment_label.TRUCK`      | bool   | true          | If true, truck objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                               |
+| `roi_overlay_segment_label.BUS`        | bool   | true          | If true, bus objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                                 |
+| `roi_overlay_segment_label.TRAILER`    | bool   | true          | If true, trailer objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                             |
+| `roi_overlay_segment_label.MOTORCYCLE` | bool   | true          | If true, motorcycle objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                          |
+| `roi_overlay_segment_label.BICYCLE`    | bool   | true          | If true, bicycle objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                             |
+| `roi_overlay_segment_label.PEDESTRIAN` | bool   | true          | If true, pedestrian objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                          |
 
 ## Assumptions / Known limits
 
@@ -69,6 +83,31 @@ The label contained in detected 2D bounding boxes (i.e., `out/objects`) will be
 If other labels (case insensitive) are contained in the file specified via the `label_file` parameter,
 those are labeled as `UNKNOWN`, while detected rectangles are drawn in the visualization result (`out/image`).
 
+The semantic segmentation mask are gray image whose pixel is index of one of the followings:
+
+| index | semantic name |
+| ----- | ------------- |
+| 0     | road          |
+| 1     | sidewalk      |
+| 2     | building      |
+| 3     | wall          |
+| 4     | fence         |
+| 5     | pole          |
+| 6     | traffic_light |
+| 7     | traffic_sign  |
+| 8     | vegetation    |
+| 9     | terrain       |
+| 10    | sky           |
+| 11    | person        |
+| 12    | ride          |
+| 13    | car           |
+| 14    | truck         |
+| 15    | bus           |
+| 16    | train         |
+| 17    | motorcycle    |
+| 18    | bicycle       |
+| 19    | others        |
+
 ## Onnx model
 
 A sample model (named `yolox-tiny.onnx`) is downloaded by ansible script on env preparation stage, if not, please, follow [Manual downloading of artifacts](https://github.com/autowarefoundation/autoware/tree/main/ansible/roles/artifacts).
@@ -146,7 +185,7 @@ Please refer [the official document](https://github.com/Megvii-BaseDetection/YOL
 
 ## Label file
 
-A sample label file (named `label.txt`)is also downloaded automatically during env preparation process
+A sample label file (named `label.txt`) and semantic segmentation color map file (name `semseg_color_map.csv`) are also downloaded automatically during env preparation process
 (**NOTE:** This file is incompatible with models that output labels for the COCO dataset (e.g., models from the official YOLOX repository)).
 
 This file represents the correspondence between class index (integer outputted from YOLOX network) and

From c4cc56cfb713f046a80d592dcf70c46bca21d01d Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Wed, 22 May 2024 19:54:01 +0900
Subject: [PATCH 13/39] fix: update model name

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 39e3fc65d245c..80f1dcc358663 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -4,7 +4,7 @@
   <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
   <arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
   <arg name="output/mask" default="/perception/object_recognition/detection/mask0"/>
-  <arg name="model_name" default="yoloXP-sPlus-T4-960x960-pseudo-finetune-semseg"/>
+  <arg name="model_name" default="yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls"/>
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
   <arg name="model_path" default="$(var data_path)/tensorrt_yolox"/>
   <arg name="score_threshold" default="0.35"/>

From 1266414d26896061b86a663b473258e1bec3bbe4 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Wed, 22 May 2024 21:36:08 +0900
Subject: [PATCH 14/39] fix: add utils into tensorrt_yolox

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../tensorrt_yolox/tensorrt_yolox_node.hpp    |  4 +-
 .../include/tensorrt_yolox/utils.hpp          | 39 +++++++++++++++++++
 .../src/tensorrt_yolox_node.cpp               |  2 +-
 perception/tensorrt_yolox/src/utils.cpp       | 33 ++++++++++++++++
 4 files changed, 75 insertions(+), 3 deletions(-)
 create mode 100644 perception/tensorrt_yolox/include/tensorrt_yolox/utils.hpp
 create mode 100644 perception/tensorrt_yolox/src/utils.cpp

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
index b90d86025bca7..77c12404c7f47 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -16,7 +16,7 @@
 #define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
 
 #include "object_recognition_utils/object_recognition_utils.hpp"
-#include "utils/utils.hpp"
+#include "tensorrt_yolox/utils.hpp"
 
 #include <image_transport/image_transport.hpp>
 #include <opencv2/opencv.hpp>
@@ -82,7 +82,7 @@ class TrtYoloXNode : public rclcpp::Node
     {"BICYCLE", 18},     // bicycle
     {"MOTORBIKE", 17},   // motorcycle
   };
-  utils::FilterTargetLabel roi_overlay_segment_labels_;
+  utils::RoiOverlaySegmenLabel roi_overlay_segment_labels_;
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/utils.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/utils.hpp
new file mode 100644
index 0000000000000..aa651f74e76ec
--- /dev/null
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/utils.hpp
@@ -0,0 +1,39 @@
+// Copyright 2023 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORRT_YOLOX__UTILS_HPP_
+#define TENSORRT_YOLOX__UTILS_HPP_
+
+#include <cstdint>
+
+namespace tensorrt_yolox
+{
+namespace utils
+{
+struct RoiOverlaySegmenLabel
+{
+  bool UNKNOWN;
+  bool CAR;
+  bool TRUCK;
+  bool BUS;
+  bool TRAILER;
+  bool MOTORCYCLE;
+  bool BICYCLE;
+  bool PEDESTRIAN;
+  bool isOverlay(const uint8_t label) const;
+};  // struct RoiOverlaySegmenLabel
+}  // namespace utils
+}  // namespace tensorrt_yolox
+
+#endif  // TENSORRT_YOLOX__UTILS_HPP_
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 52d24877ee7ef..beb4715058b58 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -263,7 +263,7 @@ void TrtYoloXNode::replaceLabelMap()
 
 int TrtYoloXNode::mapRoiLabel2SegLabel(const int32_t roi_label_index)
 {
-  if (roi_overlay_segment_labels_.isTarget(static_cast<uint8_t>(roi_label_index))) {
+  if (roi_overlay_segment_labels_.isOverlay(static_cast<uint8_t>(roi_label_index))) {
     std::string label = label_map_[roi_label_index];
 
     return remap_roi_to_semantic_[label];
diff --git a/perception/tensorrt_yolox/src/utils.cpp b/perception/tensorrt_yolox/src/utils.cpp
new file mode 100644
index 0000000000000..bd8850436a991
--- /dev/null
+++ b/perception/tensorrt_yolox/src/utils.cpp
@@ -0,0 +1,33 @@
+// Copyright 2023 TIER IV, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tensorrt_yolox/utils.hpp"
+
+#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
+
+namespace tensorrt_yolox
+{
+namespace utils
+{
+using Label = autoware_auto_perception_msgs::msg::ObjectClassification;
+
+bool RoiOverlaySegmenLabel::isOverlay(const uint8_t label) const
+{
+  return (label == Label::UNKNOWN && UNKNOWN) || (label == Label::CAR && CAR) ||
+         (label == Label::TRUCK && TRUCK) || (label == Label::BUS && BUS) ||
+         (label == Label::TRAILER && TRAILER) || (label == Label::MOTORCYCLE && MOTORCYCLE) ||
+         (label == Label::BICYCLE && BICYCLE) || (label == Label::PEDESTRIAN && PEDESTRIAN);
+}
+}  // namespace utils
+}  // namespace tensorrt_yolox

From 3488abbf1d2e371d9868536d07bb43f3bf0acbb3 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Wed, 22 May 2024 21:36:33 +0900
Subject: [PATCH 15/39] fix: launch file

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 80f1dcc358663..327e4ae7a7d59 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -6,6 +6,7 @@
   <arg name="output/mask" default="/perception/object_recognition/detection/mask0"/>
   <arg name="model_name" default="yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls"/>
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
+  <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
   <arg name="model_path" default="$(var data_path)/tensorrt_yolox"/>
   <arg name="score_threshold" default="0.35"/>
   <arg name="nms_threshold" default="0.7"/>
@@ -32,7 +33,7 @@
   <arg name="calibration_image_list_path" default="" description="Path to a file which contains path to images. Those images will be used for int8 quantization."/>
   <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>
-  <arg name="yolox_s_plus_opt_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
+
   <node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
     <remap from="~/input/compressed_image" to="$(var input/image)/compressed"/>
     <remap from="~/output/raw_image" to="$(var input/image)"/>
@@ -45,6 +46,5 @@
     <param from="$(var yolox_param_path)" allow_substs="true"/>
     <param name="build_only" value="$(var build_only)"/>
     <param name="color_map_path" value="$(var model_path)/semseg_color_map.csv"/>
-    <param from="$(var yolox_s_plus_opt_param_path)"/>
   </node>
 </launch>

From b996b69a6182f8cc1557e8584c80d9d515d95ebb Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Thu, 23 May 2024 09:12:22 +0900
Subject: [PATCH 16/39] chore: remove unnecessary depend

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/package.xml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/perception/tensorrt_yolox/package.xml b/perception/tensorrt_yolox/package.xml
index 6eb1b39202003..2afbfcb9fe3ed 100644
--- a/perception/tensorrt_yolox/package.xml
+++ b/perception/tensorrt_yolox/package.xml
@@ -21,7 +21,6 @@
   <depend>autoware_auto_perception_msgs</depend>
   <depend>cuda_utils</depend>
   <depend>cv_bridge</depend>
-  <depend>detected_object_validation</depend>
   <depend>image_transport</depend>
   <depend>libopencv-dev</depend>
   <depend>object_recognition_utils</depend>

From ffe2f7a7461a075d6352151c52d638f1bd18c5a6 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Thu, 23 May 2024 09:22:09 +0900
Subject: [PATCH 17/39] chore: fix yaml file

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
index d94f4ca004ee3..c3bcbc98af705 100644
--- a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
+++ b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
@@ -18,8 +18,7 @@
       MOTORCYCLE : true
       BICYCLE : true
       PEDESTRIAN : true
-/**:
-  ros__parameters:
+
     model_path: "$(var data_path)/tensorrt_yolox/$(var model_name).onnx"
     label_path: "$(var data_path)/tensorrt_yolox/label.txt"
     score_threshold: 0.35

From 126262bcd91b422259ed3cffd98b246766fb47d3 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Thu, 23 May 2024 09:49:09 +0900
Subject: [PATCH 18/39] chore: remove duplicated param in launch

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../config/yolox_s_plus_opt.param.yaml        |  1 +
 .../launch/yolox_s_plus_opt.launch.xml        | 27 +------------------
 2 files changed, 2 insertions(+), 26 deletions(-)

diff --git a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
index c3bcbc98af705..bece42d497963 100644
--- a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
+++ b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
@@ -21,6 +21,7 @@
 
     model_path: "$(var data_path)/tensorrt_yolox/$(var model_name).onnx"
     label_path: "$(var data_path)/tensorrt_yolox/label.txt"
+    color_map_path: $(var data_path)/tensorrt_yolox/semseg_color_map.csv"/>
     score_threshold: 0.35
     nms_threshold: 0.7
     precision: "int8" # Operation precision to be used on inference. Valid value is one of: [fp32, fp16, int8].
diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 327e4ae7a7d59..82a4b45f51018 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -7,31 +7,7 @@
   <arg name="model_name" default="yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls"/>
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
   <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
-  <arg name="model_path" default="$(var data_path)/tensorrt_yolox"/>
-  <arg name="score_threshold" default="0.35"/>
-  <arg name="nms_threshold" default="0.7"/>
-  <arg name="precision" default="int8" description="operation precision to be used on inference. Valid value is one of: [fp32, fp16, int8]"/>
-  <arg
-    name="calibration_algorithm"
-    default="Entropy"
-    description="Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: [Entropy, (Legacy | Percentile), MinMax]"
-  />
-  <arg name="dla_core_id" default="-1" description="If positive ID value is specified, the node assign inference task to the DLA core"/>
-  <arg name="quantize_first_layer" default="false" description="If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8"/>
-  <arg name="quantize_last_layer" default="false" description="If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8"/>
-  <arg
-    name="profile_per_layer"
-    default="false"
-    description="If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose."
-  />
-  <arg
-    name="clip_value"
-    default="6.0"
-    description="If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration."
-  />
-  <arg name="preprocess_on_gpu" default="true" description="If true, pre-processing is performed on GPU"/>
-  <arg name="calibration_image_list_path" default="" description="Path to a file which contains path to images. Those images will be used for int8 quantization."/>
-  <arg name="use_decompress" default="true" description="use image decompress"/>
+git   <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>
 
   <node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
@@ -45,6 +21,5 @@
     <remap from="~/out/mask" to="$(var output/mask)"/>
     <param from="$(var yolox_param_path)" allow_substs="true"/>
     <param name="build_only" value="$(var build_only)"/>
-    <param name="color_map_path" value="$(var model_path)/semseg_color_map.csv"/>
   </node>
 </launch>

From 84da29c972aa0637153475876705dc60dd08e4b1 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Fri, 24 May 2024 16:19:07 +0900
Subject: [PATCH 19/39] fix: semantic class

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../config/yolox_s_plus_opt.param.yaml        | 11 ++---
 .../tensorrt_yolox/tensorrt_yolox_node.hpp    | 41 ++++++++++++++-----
 .../include/tensorrt_yolox/utils.hpp          | 39 ------------------
 .../launch/yolox_s_plus_opt.launch.xml        |  2 +-
 .../src/tensorrt_yolox_node.cpp               |  1 +
 perception/tensorrt_yolox/src/utils.cpp       | 33 ---------------
 6 files changed, 38 insertions(+), 89 deletions(-)
 delete mode 100644 perception/tensorrt_yolox/include/tensorrt_yolox/utils.hpp
 delete mode 100644 perception/tensorrt_yolox/src/utils.cpp

diff --git a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
index bece42d497963..70d7ecc315be9 100644
--- a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
+++ b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
@@ -8,20 +8,21 @@
     overlap_roi_score_threshold: 0.3
 
     # publish color mask for result visualization
-    is_publish_color_mask: false
+    is_publish_color_mask: true
 
     roi_overlay_segment_label:
       UNKNOWN : true
-      CAR : true
-      TRUCK : true
-      BUS : true
+      CAR : false
+      TRUCK : false
+      BUS : false
       MOTORCYCLE : true
       BICYCLE : true
       PEDESTRIAN : true
+      ANIMAL: true
 
     model_path: "$(var data_path)/tensorrt_yolox/$(var model_name).onnx"
     label_path: "$(var data_path)/tensorrt_yolox/label.txt"
-    color_map_path: $(var data_path)/tensorrt_yolox/semseg_color_map.csv"/>
+    color_map_path: "$(var data_path)/tensorrt_yolox/semseg_color_map.csv"
     score_threshold: 0.35
     nms_threshold: 0.7
     precision: "int8" # Operation precision to be used on inference. Valid value is one of: [fp32, fp16, int8].
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
index 77c12404c7f47..e4b66af7213b1 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -16,7 +16,6 @@
 #define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
 
 #include "object_recognition_utils/object_recognition_utils.hpp"
-#include "tensorrt_yolox/utils.hpp"
 
 #include <image_transport/image_transport.hpp>
 #include <opencv2/opencv.hpp>
@@ -26,6 +25,7 @@
 #include <sensor_msgs/msg/image.hpp>
 #include <std_msgs/msg/header.hpp>
 #include <tier4_perception_msgs/msg/detected_objects_with_feature.hpp>
+#include <tier4_perception_msgs/msg/semantic.hpp>
 
 #if __has_include(<cv_bridge/cv_bridge.hpp>)
 #include <cv_bridge/cv_bridge.hpp>
@@ -43,9 +43,28 @@
 namespace tensorrt_yolox
 {
 using LabelMap = std::map<int, std::string>;
-
+using Label = tier4_perception_msgs::msg::Semantic;
 class TrtYoloXNode : public rclcpp::Node
 {
+  struct RoiOverlaySegmenLabel
+  {
+    bool UNKNOWN;
+    bool CAR;
+    bool TRUCK;
+    bool BUS;
+    bool MOTORCYCLE;
+    bool BICYCLE;
+    bool PEDESTRIAN;
+    bool ANIMAL;
+    bool isOverlay(const uint8_t label) const
+    {
+      return (label == Label::UNKNOWN && UNKNOWN) || (label == Label::CAR && CAR) ||
+             (label == Label::TRUCK && TRUCK) || (label == Label::BUS && BUS) ||
+             (label == Label::ANIMAL && ANIMAL) || (label == Label::MOTORBIKE && MOTORCYCLE) ||
+             (label == Label::BICYCLE && BICYCLE) || (label == Label::PEDESTRIAN && PEDESTRIAN);
+    };
+  };  // struct RoiOverlaySegmenLabel
+
 public:
   explicit TrtYoloXNode(const rclcpp::NodeOptions & node_options);
 
@@ -73,16 +92,16 @@ class TrtYoloXNode : public rclcpp::Node
   float overlap_roi_score_threshold_;
   // TODO(badai-nguyen): change to function
   std::map<std::string, int> remap_roi_to_semantic_ = {
-    {"UNKNOWN", 19},     // other
-    {"ANIMAL", 19},      // other
-    {"PEDESTRIAN", 11},  // person
-    {"CAR", 13},         // car
-    {"TRUCK", 14},       // truck
-    {"BUS", 15},         // bus
-    {"BICYCLE", 18},     // bicycle
-    {"MOTORBIKE", 17},   // motorcycle
+    {"UNKNOWN", 3},     // other
+    {"ANIMAL", 0},      // other
+    {"PEDESTRIAN", 6},  // person
+    {"CAR", 7},         // car
+    {"TRUCK", 7},       // truck
+    {"BUS", 7},         // bus
+    {"BICYCLE", 8},     // bicycle
+    {"MOTORBIKE", 8},   // motorcycle
   };
-  utils::RoiOverlaySegmenLabel roi_overlay_segment_labels_;
+  RoiOverlaySegmenLabel roi_overlay_segment_labels_;
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/utils.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/utils.hpp
deleted file mode 100644
index aa651f74e76ec..0000000000000
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/utils.hpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2023 TIER IV, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef TENSORRT_YOLOX__UTILS_HPP_
-#define TENSORRT_YOLOX__UTILS_HPP_
-
-#include <cstdint>
-
-namespace tensorrt_yolox
-{
-namespace utils
-{
-struct RoiOverlaySegmenLabel
-{
-  bool UNKNOWN;
-  bool CAR;
-  bool TRUCK;
-  bool BUS;
-  bool TRAILER;
-  bool MOTORCYCLE;
-  bool BICYCLE;
-  bool PEDESTRIAN;
-  bool isOverlay(const uint8_t label) const;
-};  // struct RoiOverlaySegmenLabel
-}  // namespace utils
-}  // namespace tensorrt_yolox
-
-#endif  // TENSORRT_YOLOX__UTILS_HPP_
diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 82a4b45f51018..9df08ef6dd7db 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -7,7 +7,7 @@
   <arg name="model_name" default="yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls"/>
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
   <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
-git   <arg name="use_decompress" default="true" description="use image decompress"/>
+  <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>
 
   <node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index beb4715058b58..446084b067af7 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -109,6 +109,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
     declare_parameter<bool>("roi_overlay_segment_label.BICYCLE");
   roi_overlay_segment_labels_.PEDESTRIAN =
     declare_parameter<bool>("roi_overlay_segment_label.PEDESTRIAN");
+  roi_overlay_segment_labels_.ANIMAL = declare_parameter<bool>("roi_overlay_segment_label.ANIMAL");
   replaceLabelMap();
 
   tensorrt_common::BuildConfig build_config(
diff --git a/perception/tensorrt_yolox/src/utils.cpp b/perception/tensorrt_yolox/src/utils.cpp
deleted file mode 100644
index bd8850436a991..0000000000000
--- a/perception/tensorrt_yolox/src/utils.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright 2023 TIER IV, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "tensorrt_yolox/utils.hpp"
-
-#include <autoware_auto_perception_msgs/msg/object_classification.hpp>
-
-namespace tensorrt_yolox
-{
-namespace utils
-{
-using Label = autoware_auto_perception_msgs::msg::ObjectClassification;
-
-bool RoiOverlaySegmenLabel::isOverlay(const uint8_t label) const
-{
-  return (label == Label::UNKNOWN && UNKNOWN) || (label == Label::CAR && CAR) ||
-         (label == Label::TRUCK && TRUCK) || (label == Label::BUS && BUS) ||
-         (label == Label::TRAILER && TRAILER) || (label == Label::MOTORCYCLE && MOTORCYCLE) ||
-         (label == Label::BICYCLE && BICYCLE) || (label == Label::PEDESTRIAN && PEDESTRIAN);
-}
-}  // namespace utils
-}  // namespace tensorrt_yolox

From 72de1e0e2f0e4aed86f9afe426390071fc26b461 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Mon, 27 May 2024 10:16:32 +0900
Subject: [PATCH 20/39] docs: update readme

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/README.md | 65 ++++++++++++++---------------
 1 file changed, 31 insertions(+), 34 deletions(-)

diff --git a/perception/tensorrt_yolox/README.md b/perception/tensorrt_yolox/README.md
index 05d6c06208387..751aa2e14bce5 100644
--- a/perception/tensorrt_yolox/README.md
+++ b/perception/tensorrt_yolox/README.md
@@ -2,7 +2,7 @@
 
 ## Purpose
 
-This package detects target objects e.g., cars, trucks, bicycles, and pedestrians with semantic segmentation header including vehicle such as cars, trucks, buses and pedestrian, building, vegetation, road, sidewalk on a image based on [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) model.
+This package detects target objects e.g., cars, trucks, bicycles, and pedestrians and segment target objects such as cars, trucks, buses and pedestrian, building, vegetation, road, sidewalk on a image based on [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX) model with multi-header structure.
 
 ## Inner-workings / Algorithms
 
@@ -58,16 +58,16 @@ Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, Jian Sun, "YOLOX: Exceeding YOLO Se
 | `calibration_image_list_path`          | string | ""            | Path to a file which contains path to images. Those images will be used for int8 quantization.                                                                                                                                           |
 | `yolox_s_plus_opt_param_path`          | string | ""            | Path to parameter file                                                                                                                                                                                                                   |
 | `is_publish_color_mask`                | bool   | false         | If true, publish color mask for result visualization                                                                                                                                                                                     |
-| `is_roi_overlap_segment`               | bool   | true          | If true, overlay detected object roi onto semantic segmentation as roi higher priority                                                                                                                                                   |
+| `is_roi_overlap_segment`               | bool   | true          | If true, overlay detected object roi onto semantic segmentation to avoid over-filtering pointcloud especially small size objects                                                                                                         |
 | `overlap_roi_score_threshold`          | float  | 0.3           | minimum existence_probability of detected roi considered to replace segmentation                                                                                                                                                         |
-| `roi_overlay_segment_label.UNKNOWN`    | bool   | true          | If true, unknown objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                             |
-| `roi_overlay_segment_label.CAR`        | bool   | true          | If true, car objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                                 |
-| `roi_overlay_segment_label.TRUCK`      | bool   | true          | If true, truck objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                               |
-| `roi_overlay_segment_label.BUS`        | bool   | true          | If true, bus objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                                 |
-| `roi_overlay_segment_label.TRAILER`    | bool   | true          | If true, trailer objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                             |
-| `roi_overlay_segment_label.MOTORCYCLE` | bool   | true          | If true, motorcycle objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                          |
-| `roi_overlay_segment_label.BICYCLE`    | bool   | true          | If true, bicycle objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                             |
-| `roi_overlay_segment_label.PEDESTRIAN` | bool   | true          | If true, pedestrian objects roi will be overlay onto sematic segmentation mask.                                                                                                                                                          |
+| `roi_overlay_segment_label.UNKNOWN`    | bool   | true          | If true, unknown objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                            |
+| `roi_overlay_segment_label.CAR`        | bool   | false         | If true, car objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                                |
+| `roi_overlay_segment_label.TRUCK`      | bool   | false         | If true, truck objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                              |
+| `roi_overlay_segment_label.BUS`        | bool   | false         | If true, bus objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                                |
+| `roi_overlay_segment_label.MOTORCYCLE` | bool   | true          | If true, motorcycle objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                         |
+| `roi_overlay_segment_label.BICYCLE`    | bool   | true          | If true, bicycle objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                            |
+| `roi_overlay_segment_label.PEDESTRIAN` | bool   | true          | If true, pedestrian objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                         |
+| `roi_overlay_segment_label.ANIMAL`     | bool   | true          | If true, animal objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                             |
 
 ## Assumptions / Known limits
 
@@ -85,28 +85,24 @@ those are labeled as `UNKNOWN`, while detected rectangles are drawn in the visua
 
 The semantic segmentation mask are gray image whose pixel is index of one of the followings:
 
-| index | semantic name |
-| ----- | ------------- |
-| 0     | road          |
-| 1     | sidewalk      |
-| 2     | building      |
-| 3     | wall          |
-| 4     | fence         |
-| 5     | pole          |
-| 6     | traffic_light |
-| 7     | traffic_sign  |
-| 8     | vegetation    |
-| 9     | terrain       |
-| 10    | sky           |
-| 11    | person        |
-| 12    | ride          |
-| 13    | car           |
-| 14    | truck         |
-| 15    | bus           |
-| 16    | train         |
-| 17    | motorcycle    |
-| 18    | bicycle       |
-| 19    | others        |
+| index | semantic name    |
+| ----- | ---------------- |
+| 0     | road             |
+| 1     | building         |
+| 2     | wall             |
+| 3     | obstacle         |
+| 4     | traffic_light    |
+| 5     | traffic_sign     |
+| 6     | person           |
+| 7     | vehicle          |
+| 8     | bike             |
+| 9     | road             |
+| 10    | sidewalk         |
+| 11    | roadPaint        |
+| 12    | curbstone        |
+| 13    | crosswalk_others |
+| 14    | vegetation       |
+| 15    | sky              |
 
 ## Onnx model
 
@@ -118,11 +114,12 @@ hence these parameters are ignored when users specify ONNX models including this
 
 This package accepts both `EfficientNMS_TRT` attached ONNXs and [models published from the official YOLOX repository](https://github.com/Megvii-BaseDetection/YOLOX/tree/main/demo/ONNXRuntime#download-onnx-models) (we referred to them as "plain" models).
 
-In addition to `yolox-tiny.onnx`, a custom model named `yolox-sPlus-opt.onnx` is either available.
-This model is based on YOLOX-s and tuned to perform more accurate detection with almost comparable execution speed with `yolox-tiny`.
+In addition to `yolox-tiny.onnx`, a custom model named `yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls` is either available.
+This model is multi-header structure model which is based on YOLOX-s and tuned to perform more accurate detection with almost comparable execution speed with `yolox-tiny`.
 To get better results with this model, users are recommended to use some specific running arguments
 such as `precision:=int8`, `calibration_algorithm:=Entropy`, `clip_value:=6.0`.
 Users can refer `launch/yolox_sPlus_opt.launch.xml` to see how this model can be used.
+Beside detection result, this model also output image semantic segmentation result for pointcloud filtering purpose.
 
 All models are automatically converted to TensorRT format.
 These converted files will be saved in the same directory as specified ONNX files

From 69cc160d26082496fbcc466d37a51cbb4b244a19 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Mon, 27 May 2024 10:17:12 +0900
Subject: [PATCH 21/39] chore: update default param

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
index 70d7ecc315be9..f7c228b51f6d8 100644
--- a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
+++ b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
@@ -8,7 +8,7 @@
     overlap_roi_score_threshold: 0.3
 
     # publish color mask for result visualization
-    is_publish_color_mask: true
+    is_publish_color_mask: false
 
     roi_overlay_segment_label:
       UNKNOWN : true

From d8e6ae7accc1ee4e07bec15f81c1d3ccbf2d568d Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Mon, 27 May 2024 10:41:17 +0900
Subject: [PATCH 22/39] fix: add processing time topic

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../tensorrt_yolox/tensorrt_yolox_node.hpp    |  4 +++
 .../src/tensorrt_yolox_node.cpp               | 25 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
index e4b66af7213b1..8c9e113559384 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -21,6 +21,8 @@
 #include <opencv2/opencv.hpp>
 #include <rclcpp/rclcpp.hpp>
 #include <tensorrt_yolox/tensorrt_yolox.hpp>
+#include <tier4_autoware_utils/ros/debug_publisher.hpp>
+#include <tier4_autoware_utils/system/stop_watch.hpp>
 
 #include <sensor_msgs/msg/image.hpp>
 #include <std_msgs/msg/header.hpp>
@@ -102,6 +104,8 @@ class TrtYoloXNode : public rclcpp::Node
     {"MOTORBIKE", 8},   // motorcycle
   };
   RoiOverlaySegmenLabel roi_overlay_segment_labels_;
+  std::unique_ptr<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_;
+  std::unique_ptr<tier4_autoware_utils::DebugPublisher> debug_publisher_;
 };
 
 }  // namespace tensorrt_yolox
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 446084b067af7..d1130533c218f 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -29,6 +29,14 @@ namespace tensorrt_yolox
 TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
 : Node("tensorrt_yolox", node_options)
 {
+  {
+    stop_watch_ptr_ =
+      std::make_unique<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>>();
+    debug_publisher_ =
+      std::make_unique<tier4_autoware_utils::DebugPublisher>(this, "tensorrt_yolox");
+    stop_watch_ptr_->tic("cyclic_time");
+    stop_watch_ptr_->tic("processing_time");
+  }
   using std::placeholders::_1;
   using std::chrono_literals::operator""ms;
 
@@ -153,6 +161,7 @@ void TrtYoloXNode::onConnect()
 
 void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
 {
+  stop_watch_ptr_->toc("processing_time", true);
   tier4_perception_msgs::msg::DetectedObjectsWithFeature out_objects;
 
   cv_bridge::CvImagePtr in_image_ptr;
@@ -216,6 +225,22 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
   out_objects.header = msg->header;
   objects_pub_->publish(out_objects);
 
+  if (debug_publisher_) {
+    const double processing_time_ms = stop_watch_ptr_->toc("processing_time", true);
+    const double cyclic_time_ms = stop_watch_ptr_->toc("cyclic_time", true);
+    const double pipeline_latency_ms =
+      std::chrono::duration<double, std::milli>(
+        std::chrono::nanoseconds(
+          (this->get_clock()->now() - out_objects.header.stamp).nanoseconds()))
+        .count();
+    debug_publisher_->publish<tier4_debug_msgs::msg::Float64Stamped>(
+      "debug/cyclic_time_ms", cyclic_time_ms);
+    debug_publisher_->publish<tier4_debug_msgs::msg::Float64Stamped>(
+      "debug/processing_time_ms", processing_time_ms);
+    debug_publisher_->publish<tier4_debug_msgs::msg::Float64Stamped>(
+      "debug/pipeline_latency_ms", pipeline_latency_ms);
+  }
+
   if (is_publish_color_mask_) {
     cv::Mat color_mask =
       cv::Mat::zeros(in_image_ptr->image.rows, in_image_ptr->image.cols, CV_8UC3);

From ea0d115fc47144bcdbca61029de1d85f5537c5d3 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Thu, 30 May 2024 08:43:16 +0900
Subject: [PATCH 23/39] chore: typo

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/README.md              | 2 +-
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/perception/tensorrt_yolox/README.md b/perception/tensorrt_yolox/README.md
index 751aa2e14bce5..ee475904ae0a4 100644
--- a/perception/tensorrt_yolox/README.md
+++ b/perception/tensorrt_yolox/README.md
@@ -83,7 +83,7 @@ The label contained in detected 2D bounding boxes (i.e., `out/objects`) will be
 If other labels (case insensitive) are contained in the file specified via the `label_file` parameter,
 those are labeled as `UNKNOWN`, while detected rectangles are drawn in the visualization result (`out/image`).
 
-The semantic segmentation mask are gray image whose pixel is index of one of the followings:
+The semantic segmentation mask is a gray image whose each pixel is index of one following class:
 
 | index | semantic name    |
 | ----- | ---------------- |
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 97c8579f2d79a..58a858a9e265b 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -916,7 +916,6 @@ bool TrtYoloX::feedforwardAndDecode(
 
   for (size_t i = 0; i < batch_size; ++i) {
     auto image_size = images[i].size();
-    auto & out_mask = out_masks[i];
     float * batch_prob = out_prob_h_.get() + (i * out_elem_num_per_batch_);
     ObjectArray object_array;
     decodeOutputs(batch_prob, object_array, scales_[i], image_size);
@@ -948,14 +947,14 @@ bool TrtYoloX::feedforwardAndDecode(
         } else {
           mask = getMaskImage(&(segmentation_results[counter]), output_dims, out_w, out_h);
         }
-        segmentation_masks_.push_back(mask);
+        segmentation_masks_.emplace_back(std::move(mask));
         counter += out_elem_num;
       }
+      // semantic segmentation was fixed as first task
+      out_masks.at(i) = segmentation_masks_.at(0);
     } else {
       continue;
     }
-    // Assume semantic segmentation is first task
-    out_mask = segmentation_masks_.at(0);
   }
   return true;
 }

From 42f2101edfd475249c87fc55cca5b0208b38949d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 29 May 2024 23:47:45 +0000
Subject: [PATCH 24/39] style(pre-commit): autofix

---
 perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index b8f9683a2d460..d1130533c218f 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -251,7 +251,6 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
     output_color_mask_msg->header = msg->header;
     color_mask_pub_.publish(output_color_mask_msg);
   }
-
 }
 
 bool TrtYoloXNode::readLabelFile(const std::string & label_path)

From c5d0e1eadd290941d12b1405c0a2cd7b6baa452a Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Fri, 7 Jun 2024 11:11:57 +0900
Subject: [PATCH 25/39] chore: fix cspell error

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../tensorrt_yolox/config/yolox_s_plus_opt.param.yaml      | 1 +
 .../include/tensorrt_yolox/tensorrt_yolox_node.hpp         | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
index f7c228b51f6d8..57c1b40c44a4d 100644
--- a/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
+++ b/perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml
@@ -1,3 +1,4 @@
+# cspell: ignore semseg
 /**:
   ros__parameters:
     # refine segmentation mask by overlay roi class
diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
index 8c9e113559384..6c6ba098e7b71 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -44,11 +44,12 @@
 
 namespace tensorrt_yolox
 {
+// cspell: ignore Semseg
 using LabelMap = std::map<int, std::string>;
 using Label = tier4_perception_msgs::msg::Semantic;
 class TrtYoloXNode : public rclcpp::Node
 {
-  struct RoiOverlaySegmenLabel
+  struct RoiOverlaySemsegLabel
   {
     bool UNKNOWN;
     bool CAR;
@@ -65,7 +66,7 @@ class TrtYoloXNode : public rclcpp::Node
              (label == Label::ANIMAL && ANIMAL) || (label == Label::MOTORBIKE && MOTORCYCLE) ||
              (label == Label::BICYCLE && BICYCLE) || (label == Label::PEDESTRIAN && PEDESTRIAN);
     };
-  };  // struct RoiOverlaySegmenLabel
+  };  // struct RoiOverlaySemsegLabel
 
 public:
   explicit TrtYoloXNode(const rclcpp::NodeOptions & node_options);
@@ -103,7 +104,7 @@ class TrtYoloXNode : public rclcpp::Node
     {"BICYCLE", 8},     // bicycle
     {"MOTORBIKE", 8},   // motorcycle
   };
-  RoiOverlaySegmenLabel roi_overlay_segment_labels_;
+  RoiOverlaySemsegLabel roi_overlay_segment_labels_;
   std::unique_ptr<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_;
   std::unique_ptr<tier4_autoware_utils::DebugPublisher> debug_publisher_;
 };

From 552922760cf38d50c4819efcf3990e00986c7311 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Tue, 11 Jun 2024 11:11:07 +0900
Subject: [PATCH 26/39] fix: yolox default param

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../include/tensorrt_yolox/tensorrt_yolox.hpp          |  7 +++----
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp       |  9 ++++-----
 perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp  | 10 ++++++++--
 .../src/yolox_single_image_inference_node.cpp          |  5 +----
 4 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
index f5c132761a85d..d287c8a44d4cf 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp
@@ -86,14 +86,13 @@ class TrtYoloX
    * @param[in] max_workspace_size maximum workspace for building TensorRT engine
    */
   TrtYoloX(
-    const std::string & model_path, const std::string & precision,
-    const std::string & color_map_path, const int num_class = 8, const float score_threshold = 0.3,
-    const float nms_threshold = 0.7,
+    const std::string & model_path, const std::string & precision, const int num_class = 8,
+    const float score_threshold = 0.3, const float nms_threshold = 0.7,
     const tensorrt_common::BuildConfig build_config = tensorrt_common::BuildConfig(),
     const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(),
     const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "",
     const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
-    const size_t max_workspace_size = (1 << 30));
+    const size_t max_workspace_size = (1 << 30), const std::string & color_map_path = "");
   /**
    * @brief Deconstruct TrtYoloX
    */
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 58a858a9e265b..0cdc24a0bade2 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -155,12 +155,11 @@ std::vector<tensorrt_yolox::Colormap> get_seg_colormap(const std::string & filen
 namespace tensorrt_yolox
 {
 TrtYoloX::TrtYoloX(
-  const std::string & model_path, const std::string & precision, const std::string & color_map_path,
-  const int num_class, const float score_threshold, const float nms_threshold,
-  tensorrt_common::BuildConfig build_config, const bool use_gpu_preprocess,
-  std::string calibration_image_list_path, const double norm_factor,
+  const std::string & model_path, const std::string & precision, const int num_class,
+  const float score_threshold, const float nms_threshold, tensorrt_common::BuildConfig build_config,
+  const bool use_gpu_preprocess, std::string calibration_image_list_path, const double norm_factor,
   [[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config,
-  const size_t max_workspace_size)
+  const size_t max_workspace_size, const std::string & color_map_path)
 {
   src_width_ = -1;
   src_height_ = -1;
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 03b4ff67699bd..52c3fbe1ba787 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -124,9 +124,15 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
     calibration_algorithm, dla_core_id, quantize_first_layer, quantize_last_layer,
     profile_per_layer, clip_value);
 
+  const double norm_factor = 1.0;
+  const std::string cache_dir = "";
+  const tensorrt_common::BatchConfig batch_config{1, 1, 1};
+  const size_t max_workspace_size = (1 << 30);
+
   trt_yolox_ = std::make_unique<tensorrt_yolox::TrtYoloX>(
-    model_path, precision, color_map_path, label_map_.size(), score_threshold, nms_threshold,
-    build_config, preprocess_on_gpu, calibration_image_list_path);
+    model_path, precision, label_map_.size(), score_threshold, nms_threshold, build_config,
+    preprocess_on_gpu, calibration_image_list_path, norm_factor, cache_dir, batch_config,
+    max_workspace_size, color_map_path);
 
   timer_ =
     rclcpp::create_timer(this, get_clock(), 100ms, std::bind(&TrtYoloXNode::onConnect, this));
diff --git a/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
index 46170d85ea1cc..360f41e470e38 100644
--- a/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
+++ b/perception/tensorrt_yolox/src/yolox_single_image_inference_node.cpp
@@ -43,11 +43,8 @@ class YoloXSingleImageInferenceNode : public rclcpp::Node
     p.replace_extension("");
     const auto output_image_path =
       declare_parameter("output_image_path", p.string() + "_detect" + ext);
-    std::string color_map_path = declare_parameter("color_map_path", "");
-    // seg_cmap_ = get_seg_colormap(color_map_path);
 
-    auto trt_yolox =
-      std::make_unique<tensorrt_yolox::TrtYoloX>(model_path, precision, color_map_path);
+    auto trt_yolox = std::make_unique<tensorrt_yolox::TrtYoloX>(model_path, precision);
     auto image = cv::imread(image_path);
     tensorrt_yolox::ObjectArrays objects;
     std::vector<cv::Mat> masks;

From 90bdb4213c283b99be9fac143cbfe2d6d0247a18 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Tue, 11 Jun 2024 12:55:20 +0900
Subject: [PATCH 27/39] chore: rename debug topics

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 52c3fbe1ba787..00c82c722554a 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -33,7 +33,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
     stop_watch_ptr_ =
       std::make_unique<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>>();
     debug_publisher_ =
-      std::make_unique<tier4_autoware_utils::DebugPublisher>(this, "tensorrt_yolox");
+      std::make_unique<tier4_autoware_utils::DebugPublisher>(this, this->get_name());
     stop_watch_ptr_->tic("cyclic_time");
     stop_watch_ptr_->tic("processing_time");
   }

From d70ca5817df17aff2350fa49ea5c5d17a3505db5 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Fri, 21 Jun 2024 13:11:31 +0900
Subject: [PATCH 28/39] chore: rename debug topics

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index cf32e0af4a4b2..f14ccbed5dc53 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -33,7 +33,7 @@ TrtYoloXNode::TrtYoloXNode(const rclcpp::NodeOptions & node_options)
     stop_watch_ptr_ =
       std::make_unique<autoware::universe_utils::StopWatch<std::chrono::milliseconds>>();
     debug_publisher_ =
-      std::make_unique<autoware::universe_utils::DebugPublisher>(this, "tensorrt_yolox");
+      std::make_unique<autoware::universe_utils::DebugPublisher>(this, this->get_name());
     stop_watch_ptr_->tic("cyclic_time");
     stop_watch_ptr_->tic("processing_time");
   }

From 1a2e2f78ba62da9e3ba05ac65cf667d075c3ec25 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Fri, 21 Jun 2024 13:28:10 +0900
Subject: [PATCH 29/39] docs: update model description

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/README.md           | 53 ++++++++++---------
 .../launch/yolox_s_plus_opt.launch.xml        |  6 ++-
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/perception/tensorrt_yolox/README.md b/perception/tensorrt_yolox/README.md
index ee475904ae0a4..96041448cbd65 100644
--- a/perception/tensorrt_yolox/README.md
+++ b/perception/tensorrt_yolox/README.md
@@ -42,32 +42,33 @@ Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, Jian Sun, "YOLOX: Exceeding YOLO Se
 
 ### Node Parameters
 
-| Name                                   | Type   | Default Value | Description                                                                                                                                                                                                                              |
-| -------------------------------------- | ------ | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model_path`                           | string | ""            | The onnx file name for yolox model                                                                                                                                                                                                       |
-| `label_path`                           | string | ""            | The label file with label names for detected objects written on it                                                                                                                                                                       |
-| `precision`                            | string | "fp16"        | The inference mode: "fp32", "fp16", "int8"                                                                                                                                                                                               |
-| `build_only`                           | bool   | false         | shutdown node after TensorRT engine file is built                                                                                                                                                                                        |
-| `calibration_algorithm`                | string | "MinMax"      | Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: Entropy",("Legacy" \| "Percentile"), "MinMax"]                                                                                            |
-| `dla_core_id`                          | int    | -1            | If positive ID value is specified, the node assign inference task to the DLA core                                                                                                                                                        |
-| `quantize_first_layer`                 | bool   | false         | If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8                                                                                                              |
-| `quantize_last_layer`                  | bool   | false         | If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8                                                                                                              |
-| `profile_per_layer`                    | bool   | false         | If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose.                                                                 |
-| `clip_value`                           | double | 0.0           | If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration |
-| `preprocess_on_gpu`                    | bool   | true          | If true, pre-processing is performed on GPU                                                                                                                                                                                              |
-| `calibration_image_list_path`          | string | ""            | Path to a file which contains path to images. Those images will be used for int8 quantization.                                                                                                                                           |
-| `yolox_s_plus_opt_param_path`          | string | ""            | Path to parameter file                                                                                                                                                                                                                   |
-| `is_publish_color_mask`                | bool   | false         | If true, publish color mask for result visualization                                                                                                                                                                                     |
-| `is_roi_overlap_segment`               | bool   | true          | If true, overlay detected object roi onto semantic segmentation to avoid over-filtering pointcloud especially small size objects                                                                                                         |
-| `overlap_roi_score_threshold`          | float  | 0.3           | minimum existence_probability of detected roi considered to replace segmentation                                                                                                                                                         |
-| `roi_overlay_segment_label.UNKNOWN`    | bool   | true          | If true, unknown objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                            |
-| `roi_overlay_segment_label.CAR`        | bool   | false         | If true, car objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                                |
-| `roi_overlay_segment_label.TRUCK`      | bool   | false         | If true, truck objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                              |
-| `roi_overlay_segment_label.BUS`        | bool   | false         | If true, bus objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                                |
-| `roi_overlay_segment_label.MOTORCYCLE` | bool   | true          | If true, motorcycle objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                         |
-| `roi_overlay_segment_label.BICYCLE`    | bool   | true          | If true, bicycle objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                            |
-| `roi_overlay_segment_label.PEDESTRIAN` | bool   | true          | If true, pedestrian objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                         |
-| `roi_overlay_segment_label.ANIMAL`     | bool   | true          | If true, animal objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                             |
+| Name         | Type   | Default Value | Description                                                                              |
+| ------------ | ------ | ------------- | ---------------------------------------------------------------------------------------- |
+| `model_path` | string | ""            | The onnx file name for yolox model                                                       |
+| `model_name` | string | ""            | The yolox model name: <br /> "yolox-sPlus-T4-960x960-pseudo-finetune" for detection only, could reduce resource and processing_time <br /> "yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls" for multi-task including semantic segmentation |
+| `label_path` | string | "" | The label file with label names for detected objects written on it |
+| `precision` | string | "fp16" | The inference mode: "fp32", "fp16", "int8" |
+| `build_only` | bool | false | shutdown node after TensorRT engine file is built |
+| `calibration_algorithm` | string | "MinMax" | Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: Entropy",("Legacy" \| "Percentile"), "MinMax"] |
+| `dla_core_id` | int | -1 | If positive ID value is specified, the node assign inference task to the DLA core |
+| `quantize_first_layer` | bool | false | If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8 |
+| `quantize_last_layer` | bool | false | If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8 |
+| `profile_per_layer` | bool | false | If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose. |
+| `clip_value` | double | 0.0 | If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration |
+| `preprocess_on_gpu` | bool | true | If true, pre-processing is performed on GPU |
+| `calibration_image_list_path` | string | "" | Path to a file which contains path to images. Those images will be used for int8 quantization. |
+| `yolox_s_plus_opt_param_path` | string | "" | Path to parameter file |
+| `is_publish_color_mask` | bool | false | If true, publish color mask for result visualization |
+| `is_roi_overlap_segment` | bool | true | If true, overlay detected object roi onto semantic segmentation to avoid over-filtering pointcloud especially small size objects |
+| `overlap_roi_score_threshold` | float | 0.3 | minimum existence_probability of detected roi considered to replace segmentation |
+| `roi_overlay_segment_label.UNKNOWN` | bool | true | If true, unknown objects roi will be overlaid onto sematic segmentation mask. |
+| `roi_overlay_segment_label.CAR` | bool | false | If true, car objects roi will be overlaid onto sematic segmentation mask. |
+| `roi_overlay_segment_label.TRUCK` | bool | false | If true, truck objects roi will be overlaid onto sematic segmentation mask. |
+| `roi_overlay_segment_label.BUS` | bool | false | If true, bus objects roi will be overlaid onto sematic segmentation mask. |
+| `roi_overlay_segment_label.MOTORCYCLE` | bool | true | If true, motorcycle objects roi will be overlaid onto sematic segmentation mask. |
+| `roi_overlay_segment_label.BICYCLE` | bool | true | If true, bicycle objects roi will be overlaid onto sematic segmentation mask. |
+| `roi_overlay_segment_label.PEDESTRIAN` | bool | true | If true, pedestrian objects roi will be overlaid onto sematic segmentation mask. |
+| `roi_overlay_segment_label.ANIMAL` | bool | true | If true, animal objects roi will be overlaid onto sematic segmentation mask. |
 
 ## Assumptions / Known limits
 
diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index 9df08ef6dd7db..de088275456c3 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -4,7 +4,11 @@
   <arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
   <arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
   <arg name="output/mask" default="/perception/object_recognition/detection/mask0"/>
-  <arg name="model_name" default="yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls"/>
+  <arg
+    name="model_name"
+    default="yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls"
+    description="options `yolox-sPlus-T4-960x960-pseudo-finetune` if only detection is needed, `yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls` if sematic segmentation is also needed"
+  />
   <arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
   <arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
   <arg name="use_decompress" default="true" description="use image decompress"/>

From c1ffa4a13f8df1502306f2fb6411fd2484aaf8f8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 21 Jun 2024 04:51:52 +0000
Subject: [PATCH 30/39] style(pre-commit): autofix

---
 perception/tensorrt_yolox/README.md | 54 ++++++++++++++---------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/perception/tensorrt_yolox/README.md b/perception/tensorrt_yolox/README.md
index 96041448cbd65..39d3dce04ca87 100644
--- a/perception/tensorrt_yolox/README.md
+++ b/perception/tensorrt_yolox/README.md
@@ -42,33 +42,33 @@ Zheng Ge, Songtao Liu, Feng Wang, Zeming Li, Jian Sun, "YOLOX: Exceeding YOLO Se
 
 ### Node Parameters
 
-| Name         | Type   | Default Value | Description                                                                              |
-| ------------ | ------ | ------------- | ---------------------------------------------------------------------------------------- |
-| `model_path` | string | ""            | The onnx file name for yolox model                                                       |
-| `model_name` | string | ""            | The yolox model name: <br /> "yolox-sPlus-T4-960x960-pseudo-finetune" for detection only, could reduce resource and processing_time <br /> "yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls" for multi-task including semantic segmentation |
-| `label_path` | string | "" | The label file with label names for detected objects written on it |
-| `precision` | string | "fp16" | The inference mode: "fp32", "fp16", "int8" |
-| `build_only` | bool | false | shutdown node after TensorRT engine file is built |
-| `calibration_algorithm` | string | "MinMax" | Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: Entropy",("Legacy" \| "Percentile"), "MinMax"] |
-| `dla_core_id` | int | -1 | If positive ID value is specified, the node assign inference task to the DLA core |
-| `quantize_first_layer` | bool | false | If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8 |
-| `quantize_last_layer` | bool | false | If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8 |
-| `profile_per_layer` | bool | false | If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose. |
-| `clip_value` | double | 0.0 | If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration |
-| `preprocess_on_gpu` | bool | true | If true, pre-processing is performed on GPU |
-| `calibration_image_list_path` | string | "" | Path to a file which contains path to images. Those images will be used for int8 quantization. |
-| `yolox_s_plus_opt_param_path` | string | "" | Path to parameter file |
-| `is_publish_color_mask` | bool | false | If true, publish color mask for result visualization |
-| `is_roi_overlap_segment` | bool | true | If true, overlay detected object roi onto semantic segmentation to avoid over-filtering pointcloud especially small size objects |
-| `overlap_roi_score_threshold` | float | 0.3 | minimum existence_probability of detected roi considered to replace segmentation |
-| `roi_overlay_segment_label.UNKNOWN` | bool | true | If true, unknown objects roi will be overlaid onto sematic segmentation mask. |
-| `roi_overlay_segment_label.CAR` | bool | false | If true, car objects roi will be overlaid onto sematic segmentation mask. |
-| `roi_overlay_segment_label.TRUCK` | bool | false | If true, truck objects roi will be overlaid onto sematic segmentation mask. |
-| `roi_overlay_segment_label.BUS` | bool | false | If true, bus objects roi will be overlaid onto sematic segmentation mask. |
-| `roi_overlay_segment_label.MOTORCYCLE` | bool | true | If true, motorcycle objects roi will be overlaid onto sematic segmentation mask. |
-| `roi_overlay_segment_label.BICYCLE` | bool | true | If true, bicycle objects roi will be overlaid onto sematic segmentation mask. |
-| `roi_overlay_segment_label.PEDESTRIAN` | bool | true | If true, pedestrian objects roi will be overlaid onto sematic segmentation mask. |
-| `roi_overlay_segment_label.ANIMAL` | bool | true | If true, animal objects roi will be overlaid onto sematic segmentation mask. |
+| Name                                   | Type   | Default Value | Description                                                                                                                                                                                                                                 |
+| -------------------------------------- | ------ | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model_path`                           | string | ""            | The onnx file name for yolox model                                                                                                                                                                                                          |
+| `model_name`                           | string | ""            | The yolox model name: <br /> "yolox-sPlus-T4-960x960-pseudo-finetune" for detection only, could reduce resource and processing_time <br /> "yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls" for multi-task including semantic segmentation |
+| `label_path`                           | string | ""            | The label file with label names for detected objects written on it                                                                                                                                                                          |
+| `precision`                            | string | "fp16"        | The inference mode: "fp32", "fp16", "int8"                                                                                                                                                                                                  |
+| `build_only`                           | bool   | false         | shutdown node after TensorRT engine file is built                                                                                                                                                                                           |
+| `calibration_algorithm`                | string | "MinMax"      | Calibration algorithm to be used for quantization when precision==int8. Valid value is one of: Entropy",("Legacy" \| "Percentile"), "MinMax"]                                                                                               |
+| `dla_core_id`                          | int    | -1            | If positive ID value is specified, the node assign inference task to the DLA core                                                                                                                                                           |
+| `quantize_first_layer`                 | bool   | false         | If true, set the operating precision for the first (input) layer to be fp16. This option is valid only when precision==int8                                                                                                                 |
+| `quantize_last_layer`                  | bool   | false         | If true, set the operating precision for the last (output) layer to be fp16. This option is valid only when precision==int8                                                                                                                 |
+| `profile_per_layer`                    | bool   | false         | If true, profiler function will be enabled. Since the profile function may affect execution speed, it is recommended to set this flag true only for development purpose.                                                                    |
+| `clip_value`                           | double | 0.0           | If positive value is specified, the value of each layer output will be clipped between [0.0, clip_value]. This option is valid only when precision==int8 and used to manually specify the dynamic range instead of using any calibration    |
+| `preprocess_on_gpu`                    | bool   | true          | If true, pre-processing is performed on GPU                                                                                                                                                                                                 |
+| `calibration_image_list_path`          | string | ""            | Path to a file which contains path to images. Those images will be used for int8 quantization.                                                                                                                                              |
+| `yolox_s_plus_opt_param_path`          | string | ""            | Path to parameter file                                                                                                                                                                                                                      |
+| `is_publish_color_mask`                | bool   | false         | If true, publish color mask for result visualization                                                                                                                                                                                        |
+| `is_roi_overlap_segment`               | bool   | true          | If true, overlay detected object roi onto semantic segmentation to avoid over-filtering pointcloud especially small size objects                                                                                                            |
+| `overlap_roi_score_threshold`          | float  | 0.3           | minimum existence_probability of detected roi considered to replace segmentation                                                                                                                                                            |
+| `roi_overlay_segment_label.UNKNOWN`    | bool   | true          | If true, unknown objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                               |
+| `roi_overlay_segment_label.CAR`        | bool   | false         | If true, car objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                                   |
+| `roi_overlay_segment_label.TRUCK`      | bool   | false         | If true, truck objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                                 |
+| `roi_overlay_segment_label.BUS`        | bool   | false         | If true, bus objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                                   |
+| `roi_overlay_segment_label.MOTORCYCLE` | bool   | true          | If true, motorcycle objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                            |
+| `roi_overlay_segment_label.BICYCLE`    | bool   | true          | If true, bicycle objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                               |
+| `roi_overlay_segment_label.PEDESTRIAN` | bool   | true          | If true, pedestrian objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                            |
+| `roi_overlay_segment_label.ANIMAL`     | bool   | true          | If true, animal objects roi will be overlaid onto sematic segmentation mask.                                                                                                                                                                |
 
 ## Assumptions / Known limits
 

From ff05fb6d2d81a1be0d14f3064c3823b4aa53d4cf Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Fri, 21 Jun 2024 15:14:21 +0900
Subject: [PATCH 31/39] fix: launch

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
index de088275456c3..e4436a0424be5 100644
--- a/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
+++ b/perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml
@@ -14,9 +14,11 @@
   <arg name="use_decompress" default="true" description="use image decompress"/>
   <arg name="build_only" default="false" description="exit after trt engine is built"/>
 
+  <arg name="param_file" default="$(find-pkg-share image_transport_decompressor)/config/image_transport_decompressor.param.yaml"/>
   <node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
     <remap from="~/input/compressed_image" to="$(var input/image)/compressed"/>
     <remap from="~/output/raw_image" to="$(var input/image)"/>
+    <param from="$(var param_file)"/>
   </node>
 
   <node pkg="tensorrt_yolox" exec="tensorrt_yolox_node_exe" name="tensorrt_yolox" output="screen">

From 2d32fcbfc71b830cfdf858607ec89b386803878d Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Fri, 21 Jun 2024 16:32:02 +0900
Subject: [PATCH 32/39] refactor: unpublish mask for single task

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../tensorrt_yolox/src/tensorrt_yolox_node.cpp  | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index f14ccbed5dc53..0beeadeca606a 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -216,17 +216,18 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
       8, 0);
     // Refine mask: replacing segmentation mask by roi class
     // This should remove when the segmentation accuracy is high
-    if (is_roi_overlap_segment_) {
+    if (is_roi_overlap_segment_ && trt_yolox_->getMultitaskNum() > 0) {
       overlapSegmentByRoi(yolox_object, mask);
     }
   }
   // TODO(badai-nguyen): consider to change to 4bits data transfer
-  sensor_msgs::msg::Image::SharedPtr out_mask_msg =
-    cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::MONO8, mask)
-      .toImageMsg();
-  out_mask_msg->header = msg->header;
-  mask_pub_.publish(out_mask_msg);
-
+  if (trt_yolox_->getMultitaskNum() > 0) {
+    sensor_msgs::msg::Image::SharedPtr out_mask_msg =
+      cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::MONO8, mask)
+        .toImageMsg();
+    out_mask_msg->header = msg->header;
+    mask_pub_.publish(out_mask_msg);
+  }
   image_pub_.publish(in_image_ptr->toImageMsg());
   out_objects.header = msg->header;
   objects_pub_->publish(out_objects);
@@ -247,7 +248,7 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
       "debug/pipeline_latency_ms", pipeline_latency_ms);
   }
 
-  if (is_publish_color_mask_) {
+  if (is_publish_color_mask_ && trt_yolox_->getMultitaskNum() > 0) {
     cv::Mat color_mask =
       cv::Mat::zeros(in_image_ptr->image.rows, in_image_ptr->image.cols, CV_8UC3);
     trt_yolox_->getColorizedMask(trt_yolox_->getColorMap(), mask, color_mask);

From ff14e5265a5d4409c29cfdf2178d6dd585952d01 Mon Sep 17 00:00:00 2001
From: badai nguyen <94814556+badai-nguyen@users.noreply.github.com>
Date: Mon, 24 Jun 2024 08:18:34 +0900
Subject: [PATCH 33/39] Update perception/tensorrt_yolox/src/tensorrt_yolox.cpp

Co-authored-by: Manato Hirabayashi <3022416+manato@users.noreply.github.com>
---
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 25e441b7ecdc9..bfc7200155a8f 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -922,8 +922,7 @@ bool TrtYoloX::feedforwardAndDecode(
     objects.emplace_back(object_array);
     if (multitask_) {
       segmentation_masks_.clear();
-      float * segmentation_results =
-        segmentation_out_prob_h_.get() + (i * segmentation_out_elem_num_per_batch_);
+    
       size_t counter = 0;
       int batch =
         static_cast<int>(segmentation_out_elem_num_ / segmentation_out_elem_num_per_batch_);

From 9df610327c85bbedf667cc7a05f97a938698f584 Mon Sep 17 00:00:00 2001
From: badai nguyen <94814556+badai-nguyen@users.noreply.github.com>
Date: Mon, 24 Jun 2024 08:18:46 +0900
Subject: [PATCH 34/39] Update perception/tensorrt_yolox/src/tensorrt_yolox.cpp

Co-authored-by: Manato Hirabayashi <3022416+manato@users.noreply.github.com>
---
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index bfc7200155a8f..192ee432ffb47 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -943,6 +943,8 @@ bool TrtYoloX::feedforwardAndDecode(
             segmentation_out_prob_d_.get() + (i * segmentation_out_elem_num_per_batch_);
           mask = getMaskImageGpu(&(d_segmentation_results[counter]), output_dims, out_w, out_h, i);
         } else {
+          float * segmentation_results =
+              segmentation_out_prob_h_.get() + (i * segmentation_out_elem_num_per_batch_);
           mask = getMaskImage(&(segmentation_results[counter]), output_dims, out_w, out_h);
         }
         segmentation_masks_.emplace_back(std::move(mask));

From f305df26a265196835ad5938d474cf21ef43eebf Mon Sep 17 00:00:00 2001
From: badai nguyen <94814556+badai-nguyen@users.noreply.github.com>
Date: Mon, 24 Jun 2024 08:18:58 +0900
Subject: [PATCH 35/39] Update perception/tensorrt_yolox/src/tensorrt_yolox.cpp

Co-authored-by: Manato Hirabayashi <3022416+manato@users.noreply.github.com>
---
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 192ee432ffb47..9751c20e0f36b 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -905,7 +905,7 @@ bool TrtYoloX::feedforwardAndDecode(
   CHECK_CUDA_ERROR(cudaMemcpyAsync(
     out_prob_h_.get(), out_prob_d_.get(), sizeof(float) * out_elem_num_, cudaMemcpyDeviceToHost,
     *stream_));
-  if (multitask_) {
+  if (multitask_ && !use_gpu_preprocess_) {
     CHECK_CUDA_ERROR(cudaMemcpyAsync(
       segmentation_out_prob_h_.get(), segmentation_out_prob_d_.get(),
       sizeof(float) * segmentation_out_elem_num_, cudaMemcpyDeviceToHost, *stream_));

From 174010e91190e317767650a6e0d667ded38b5742 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 23 Jun 2024 23:20:31 +0000
Subject: [PATCH 36/39] style(pre-commit): autofix

---
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 9751c20e0f36b..6adf1dcb88aad 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -922,7 +922,7 @@ bool TrtYoloX::feedforwardAndDecode(
     objects.emplace_back(object_array);
     if (multitask_) {
       segmentation_masks_.clear();
-    
+
       size_t counter = 0;
       int batch =
         static_cast<int>(segmentation_out_elem_num_ / segmentation_out_elem_num_per_batch_);

From d8006916d058d71e6a256614af5c3208d83aa8f6 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Mon, 24 Jun 2024 08:25:16 +0900
Subject: [PATCH 37/39] docs: update reamde

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 perception/tensorrt_yolox/README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/perception/tensorrt_yolox/README.md b/perception/tensorrt_yolox/README.md
index 39d3dce04ca87..af88e73cc04a6 100644
--- a/perception/tensorrt_yolox/README.md
+++ b/perception/tensorrt_yolox/README.md
@@ -194,3 +194,4 @@ with labels according to the order in this file.
 
 - <https://github.com/Megvii-BaseDetection/YOLOX>
 - <https://github.com/wep21/yolox_onnx_modifier>
+- <https://github.com/tier4/trt-yoloXP>

From 8832a3a808a9ae3cc7da6c87ad6d7ce0551da759 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 23 Jun 2024 23:27:13 +0000
Subject: [PATCH 38/39] style(pre-commit): autofix

---
 perception/tensorrt_yolox/src/tensorrt_yolox.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
index 6adf1dcb88aad..af6cecd2eff7d 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox.cpp
@@ -944,7 +944,7 @@ bool TrtYoloX::feedforwardAndDecode(
           mask = getMaskImageGpu(&(d_segmentation_results[counter]), output_dims, out_w, out_h, i);
         } else {
           float * segmentation_results =
-              segmentation_out_prob_h_.get() + (i * segmentation_out_elem_num_per_batch_);
+            segmentation_out_prob_h_.get() + (i * segmentation_out_elem_num_per_batch_);
           mask = getMaskImage(&(segmentation_results[counter]), output_dims, out_w, out_h);
         }
         segmentation_masks_.emplace_back(std::move(mask));

From a15ac049be6e0763f28f03fd407bc14af79dc524 Mon Sep 17 00:00:00 2001
From: badai-nguyen <dai.nguyen@tier4.jp>
Date: Mon, 24 Jun 2024 09:16:55 +0900
Subject: [PATCH 39/39] fix: skip mask size as yolox output

Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp>
---
 .../tensorrt_yolox/tensorrt_yolox_node.hpp    |  3 +-
 .../src/tensorrt_yolox_node.cpp               | 28 +++++++++++--------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
index a011b7b996477..48ffedb98441d 100644
--- a/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
+++ b/perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp
@@ -76,7 +76,8 @@ class TrtYoloXNode : public rclcpp::Node
   void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
   bool readLabelFile(const std::string & label_path);
   void replaceLabelMap();
-  void overlapSegmentByRoi(const tensorrt_yolox::Object & object, cv::Mat & mask);
+  void overlapSegmentByRoi(
+    const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
   int mapRoiLabel2SegLabel(const int32_t roi_label_index);
   image_transport::Publisher image_pub_;
   image_transport::Publisher mask_pub_;
diff --git a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
index 0beeadeca606a..c13384918bd9c 100644
--- a/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
+++ b/perception/tensorrt_yolox/src/tensorrt_yolox_node.cpp
@@ -190,10 +190,6 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
     return;
   }
   auto & mask = masks.at(0);
-  // TODO(badai-nguyen): change to postprocess on gpu option
-  cv::resize(
-    mask, mask, cv::Size(in_image_ptr->image.cols, in_image_ptr->image.rows), 0, 0,
-    cv::INTER_NEAREST);
 
   for (const auto & yolox_object : objects.at(0)) {
     tier4_perception_msgs::msg::DetectedObjectWithFeature object;
@@ -217,7 +213,7 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
     // Refine mask: replacing segmentation mask by roi class
     // This should remove when the segmentation accuracy is high
     if (is_roi_overlap_segment_ && trt_yolox_->getMultitaskNum() > 0) {
-      overlapSegmentByRoi(yolox_object, mask);
+      overlapSegmentByRoi(yolox_object, mask, width, height);
     }
   }
   // TODO(badai-nguyen): consider to change to 4bits data transfer
@@ -249,8 +245,7 @@ void TrtYoloXNode::onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg)
   }
 
   if (is_publish_color_mask_ && trt_yolox_->getMultitaskNum() > 0) {
-    cv::Mat color_mask =
-      cv::Mat::zeros(in_image_ptr->image.rows, in_image_ptr->image.cols, CV_8UC3);
+    cv::Mat color_mask = cv::Mat::zeros(mask.rows, mask.cols, CV_8UC3);
     trt_yolox_->getColorizedMask(trt_yolox_->getColorMap(), mask, color_mask);
     sensor_msgs::msg::Image::SharedPtr output_color_mask_msg =
       cv_bridge::CvImage(std_msgs::msg::Header(), sensor_msgs::image_encodings::BGR8, color_mask)
@@ -304,16 +299,25 @@ int TrtYoloXNode::mapRoiLabel2SegLabel(const int32_t roi_label_index)
   return -1;
 }
 
-void TrtYoloXNode::overlapSegmentByRoi(const tensorrt_yolox::Object & roi_object, cv::Mat & mask)
+void TrtYoloXNode::overlapSegmentByRoi(
+  const tensorrt_yolox::Object & roi_object, cv::Mat & mask, const int orig_width,
+  const int orig_height)
 {
   if (roi_object.score < overlap_roi_score_threshold_) return;
   int seg_class_index = mapRoiLabel2SegLabel(roi_object.type);
   if (seg_class_index < 0) return;
+
+  const float scale_x = static_cast<float>(mask.cols) / static_cast<float>(orig_width);
+  const float scale_y = static_cast<float>(mask.rows) / static_cast<float>(orig_height);
+  const int roi_width = static_cast<int>(roi_object.width * scale_x);
+  const int roi_height = static_cast<int>(roi_object.height * scale_y);
+  const int roi_x_offset = static_cast<int>(roi_object.x_offset * scale_x);
+  const int roi_y_offset = static_cast<int>(roi_object.y_offset * scale_y);
+
   cv::Mat replace_roi(
-    cv::Size(roi_object.width, roi_object.height), mask.type(),
-    static_cast<uint8_t>(seg_class_index));
-  replace_roi.copyTo(mask.colRange(roi_object.x_offset, roi_object.x_offset + roi_object.width)
-                       .rowRange(roi_object.y_offset, roi_object.y_offset + roi_object.height));
+    cv::Size(roi_width, roi_height), mask.type(), static_cast<uint8_t>(seg_class_index));
+  replace_roi.copyTo(mask.colRange(roi_x_offset, roi_x_offset + roi_width)
+                       .rowRange(roi_y_offset, roi_y_offset + roi_height));
 }
 
 }  // namespace tensorrt_yolox