Skip to content

Commit dfcd130

Browse files
badai-nguyenmanatopre-commit-ci[bot]
committed
feat(tensorrt yolox): inference and publish mask image from yolox model with semantic segmentation header (autowarefoundation#5553)
* add segmentation model Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> fix: add multitask for segment Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: publish mask Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * feat: publish colorized mask Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: resize yolox mask Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: add memory allocate operations Signed-off-by: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp> * refactor: remove underscore for a local variable Signed-off-by: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp> * chore: add condition to check the number of subscriber for newly added topics Signed-off-by: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp> * chore: pre-commit Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: add roi overlapping segment Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: roi overlay segment Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * chore: refactor Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * docs: update readme Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: update model name Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: add utils into tensorrt_yolox Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: launch file Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * chore: remove unnecessary depend Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * chore: fix yaml file Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * chore: remove duplicated param in launch Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: semantic class Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * docs: update readme Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * chore: update default param Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: add processing time topic Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * chore: typo Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * style(pre-commit): autofix * chore: fix cspell error Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * fix: yolox default param Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * chore: rename debug topics Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * chore: rename debug topics Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * docs: update model description Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * style(pre-commit): autofix * fix: launch Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * refactor: unpublish mask for single task Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * Update perception/tensorrt_yolox/src/tensorrt_yolox.cpp Co-authored-by: Manato Hirabayashi <3022416+manato@users.noreply.github.com> * Update perception/tensorrt_yolox/src/tensorrt_yolox.cpp Co-authored-by: Manato Hirabayashi <3022416+manato@users.noreply.github.com> * Update perception/tensorrt_yolox/src/tensorrt_yolox.cpp Co-authored-by: Manato Hirabayashi <3022416+manato@users.noreply.github.com> * style(pre-commit): autofix * docs: update reamde Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> * style(pre-commit): autofix * fix: skip mask size as yolox output Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> --------- Signed-off-by: badai-nguyen <dai.nguyen@tier4.jp> Signed-off-by: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp> Co-authored-by: Manato HIRABAYASHI <manato.hirabayashi@tier4.jp> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Manato Hirabayashi <3022416+manato@users.noreply.github.com>
1 parent 716c19a commit dfcd130

File tree

11 files changed

+613
-42
lines changed

11 files changed

+613
-42
lines changed

common/tensorrt_common/include/tensorrt_common/tensorrt_common.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
#ifndef TENSORRT_COMMON__TENSORRT_COMMON_HPP_
1616
#define TENSORRT_COMMON__TENSORRT_COMMON_HPP_
1717

18+
#ifndef YOLOX_STANDALONE
1819
#include <rclcpp/rclcpp.hpp>
20+
#endif
1921

2022
#include <NvInfer.h>
2123
#include <NvOnnxParser.h>
@@ -86,6 +88,7 @@ struct BuildConfig
8688
profile_per_layer(profile_per_layer),
8789
clip_value(clip_value)
8890
{
91+
#ifndef YOLOX_STANDALONE
8992
if (
9093
std::find(valid_calib_type.begin(), valid_calib_type.end(), calib_type_str) ==
9194
valid_calib_type.end()) {
@@ -95,6 +98,7 @@ struct BuildConfig
9598
<< "Default calibration type will be used: MinMax" << std::endl;
9699
std::cerr << message.str();
97100
}
101+
#endif
98102
}
99103
};
100104

perception/tensorrt_yolox/README.md

+61-23
Large diffs are not rendered by default.

perception/tensorrt_yolox/config/yolox_s_plus_opt.param.yaml

+22
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,29 @@
1+
# cspell: ignore semseg
12
/**:
23
ros__parameters:
4+
# refine segmentation mask by overlay roi class
5+
# disable when sematic segmentation accuracy is good enough
6+
is_roi_overlap_segment: true
7+
8+
# minimum existence_probability of detected roi considered to replace segmentation
9+
overlap_roi_score_threshold: 0.3
10+
11+
# publish color mask for result visualization
12+
is_publish_color_mask: false
13+
14+
roi_overlay_segment_label:
15+
UNKNOWN : true
16+
CAR : false
17+
TRUCK : false
18+
BUS : false
19+
MOTORCYCLE : true
20+
BICYCLE : true
21+
PEDESTRIAN : true
22+
ANIMAL: true
23+
324
model_path: "$(var data_path)/tensorrt_yolox/$(var model_name).onnx"
425
label_path: "$(var data_path)/tensorrt_yolox/label.txt"
26+
color_map_path: "$(var data_path)/tensorrt_yolox/semseg_color_map.csv"
527
score_threshold: 0.35
628
nms_threshold: 0.7
729
precision: "int8" # Operation precision to be used on inference. Valid value is one of: [fp32, fp16, int8].

perception/tensorrt_yolox/include/tensorrt_yolox/preprocess.hpp

+16-1
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,21 @@ extern void crop_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
179179
extern void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
180180
float * dst, unsigned char * src, int d_w, int d_h, int d_c, Roi * d_roi, int s_w, int s_h,
181181
int s_c, int batch, float norm, cudaStream_t stream);
182-
} // namespace tensorrt_yolox
183182

183+
/**
184+
* @brief Argmax on GPU
185+
* @param[out] dst processed image
186+
* @param[in] src probability map
187+
* @param[in] d_w width for output
188+
* @param[in] d_h height for output
189+
* @param[in] s_w width for input
190+
* @param[in] s_h height for input
191+
* @param[in] s_c channel for input
192+
* @param[in] batch batch size
193+
* @param[in] stream cuda stream
194+
*/
195+
extern void argmax_gpu(
196+
unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch,
197+
cudaStream_t stream);
198+
} // namespace tensorrt_yolox
184199
#endif // TENSORRT_YOLOX__PREPROCESS_HPP_

perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox.hpp

+64-3
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,13 @@ struct GridAndStride
5252
int stride;
5353
};
5454

55+
typedef struct Colormap_
56+
{
57+
int id;
58+
std::string name;
59+
std::vector<unsigned char> color;
60+
} Colormap;
61+
5562
/**
5663
* @class TrtYoloX
5764
* @brief TensorRT YOLOX for faster inference
@@ -85,7 +92,7 @@ class TrtYoloX
8592
const bool use_gpu_preprocess = false, std::string calibration_image_list_file = std::string(),
8693
const double norm_factor = 1.0, [[maybe_unused]] const std::string & cache_dir = "",
8794
const tensorrt_common::BatchConfig & batch_config = {1, 1, 1},
88-
const size_t max_workspace_size = (1 << 30));
95+
const size_t max_workspace_size = (1 << 30), const std::string & color_map_path = "");
8996
/**
9097
* @brief Deconstruct TrtYoloX
9198
*/
@@ -96,7 +103,9 @@ class TrtYoloX
96103
* @param[out] objects results for object detection
97104
* @param[in] images batched images
98105
*/
99-
bool doInference(const std::vector<cv::Mat> & images, ObjectArrays & objects);
106+
bool doInference(
107+
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
108+
std::vector<cv::Mat> & color_masks);
100109

101110
/**
102111
* @brief run inference including pre-process and post-process
@@ -130,6 +139,22 @@ class TrtYoloX
130139
*/
131140
void printProfiling(void);
132141

142+
/**
143+
* @brief get num for multitask heads
144+
*/
145+
int getMultitaskNum(void);
146+
147+
/**
148+
* @brief get colorized masks from index using specific colormap
149+
* @param[out] cmask colorized mask
150+
* @param[in] index multitask index
151+
* @param[in] colormap colormap for masks
152+
*/
153+
void getColorizedMask(
154+
const std::vector<tensorrt_yolox::Colormap> & colormap, const cv::Mat & mask,
155+
cv::Mat & colorized_mask);
156+
inline std::vector<Colormap> getColorMap() { return sematic_color_map_; }
157+
133158
private:
134159
/**
135160
* @brief run preprocess including resizing, letterbox, NHWC2NCHW and toFloat on CPU
@@ -177,7 +202,9 @@ class TrtYoloX
177202
const cv::Mat & images, int batch_size, ObjectArrays & objects);
178203

179204
bool feedforward(const std::vector<cv::Mat> & images, ObjectArrays & objects);
180-
bool feedforwardAndDecode(const std::vector<cv::Mat> & images, ObjectArrays & objects);
205+
bool feedforwardAndDecode(
206+
const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector<cv::Mat> & masks,
207+
std::vector<cv::Mat> & color_masks);
181208
void decodeOutputs(float * prob, ObjectArray & objects, float scale, cv::Size & img_size) const;
182209
void generateGridsAndStride(
183210
const int target_w, const int target_h, const std::vector<int> & strides,
@@ -206,6 +233,26 @@ class TrtYoloX
206233
void nmsSortedBboxes(
207234
const ObjectArray & face_objects, std::vector<int> & picked, float nms_threshold) const;
208235

236+
/**
237+
* @brief get a mask image for a segmentation head
238+
* @param[out] argmax argmax results
239+
* @param[in] prob probability map
240+
* @param[in] dims dimension for probability map
241+
* @param[in] out_w mask width excluding letterbox
242+
* @param[in] out_h mask height excluding letterbox
243+
*/
244+
cv::Mat getMaskImage(float * prob, nvinfer1::Dims dims, int out_w, int out_h);
245+
246+
/**
247+
* @brief get a mask image on GPUs for a segmentation head
248+
* @param[out] mask image
249+
* @param[in] prob probability map on device
250+
* @param[in] out_w mask width excluding letterbox
251+
* @param[in] out_h mask height excluding letterbox
252+
* @param[in] b current batch
253+
*/
254+
cv::Mat getMaskImageGpu(float * d_prob, nvinfer1::Dims dims, int out_w, int out_h, int b);
255+
209256
std::unique_ptr<tensorrt_common::TrtCommon> trt_common_;
210257

211258
std::vector<float> input_h_;
@@ -249,6 +296,20 @@ class TrtYoloX
249296
CudaUniquePtrHost<Roi[]> roi_h_;
250297
// device pointer for ROI
251298
CudaUniquePtr<Roi[]> roi_d_;
299+
300+
// flag whether model has multitasks
301+
int multitask_;
302+
// buff size for segmentation heads
303+
CudaUniquePtr<float[]> segmentation_out_prob_d_;
304+
CudaUniquePtrHost<float[]> segmentation_out_prob_h_;
305+
size_t segmentation_out_elem_num_;
306+
size_t segmentation_out_elem_num_per_batch_;
307+
std::vector<cv::Mat> segmentation_masks_;
308+
// host buffer for argmax postprocessing on GPU
309+
CudaUniquePtrHost<unsigned char[]> argmax_buf_h_;
310+
// device buffer for argmax postprocessing on GPU
311+
CudaUniquePtr<unsigned char[]> argmax_buf_d_;
312+
std::vector<tensorrt_yolox::Colormap> sematic_color_map_;
252313
};
253314

254315
} // namespace tensorrt_yolox

perception/tensorrt_yolox/include/tensorrt_yolox/tensorrt_yolox_node.hpp

+45-2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
#ifndef TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
1616
#define TENSORRT_YOLOX__TENSORRT_YOLOX_NODE_HPP_
1717

18+
#include "object_recognition_utils/object_recognition_utils.hpp"
19+
1820
#include <image_transport/image_transport.hpp>
1921
#include <opencv2/opencv.hpp>
2022
#include <rclcpp/rclcpp.hpp>
@@ -25,6 +27,7 @@
2527
#include <sensor_msgs/msg/image.hpp>
2628
#include <std_msgs/msg/header.hpp>
2729
#include <tier4_perception_msgs/msg/detected_objects_with_feature.hpp>
30+
#include <tier4_perception_msgs/msg/semantic.hpp>
2831

2932
#if __has_include(<cv_bridge/cv_bridge.hpp>)
3033
#include <cv_bridge/cv_bridge.hpp>
@@ -41,10 +44,30 @@
4144

4245
namespace tensorrt_yolox
4346
{
47+
// cspell: ignore Semseg
4448
using LabelMap = std::map<int, std::string>;
45-
49+
using Label = tier4_perception_msgs::msg::Semantic;
4650
class TrtYoloXNode : public rclcpp::Node
4751
{
52+
struct RoiOverlaySemsegLabel
53+
{
54+
bool UNKNOWN;
55+
bool CAR;
56+
bool TRUCK;
57+
bool BUS;
58+
bool MOTORCYCLE;
59+
bool BICYCLE;
60+
bool PEDESTRIAN;
61+
bool ANIMAL;
62+
bool isOverlay(const uint8_t label) const
63+
{
64+
return (label == Label::UNKNOWN && UNKNOWN) || (label == Label::CAR && CAR) ||
65+
(label == Label::TRUCK && TRUCK) || (label == Label::BUS && BUS) ||
66+
(label == Label::ANIMAL && ANIMAL) || (label == Label::MOTORBIKE && MOTORCYCLE) ||
67+
(label == Label::BICYCLE && BICYCLE) || (label == Label::PEDESTRIAN && PEDESTRIAN);
68+
};
69+
}; // struct RoiOverlaySemsegLabel
70+
4871
public:
4972
explicit TrtYoloXNode(const rclcpp::NodeOptions & node_options);
5073

@@ -53,8 +76,13 @@ class TrtYoloXNode : public rclcpp::Node
5376
void onImage(const sensor_msgs::msg::Image::ConstSharedPtr msg);
5477
bool readLabelFile(const std::string & label_path);
5578
void replaceLabelMap();
56-
79+
void overlapSegmentByRoi(
80+
const tensorrt_yolox::Object & object, cv::Mat & mask, const int width, const int height);
81+
int mapRoiLabel2SegLabel(const int32_t roi_label_index);
5782
image_transport::Publisher image_pub_;
83+
image_transport::Publisher mask_pub_;
84+
image_transport::Publisher color_mask_pub_;
85+
5886
rclcpp::Publisher<tier4_perception_msgs::msg::DetectedObjectsWithFeature>::SharedPtr objects_pub_;
5987

6088
image_transport::Subscriber image_sub_;
@@ -63,6 +91,21 @@ class TrtYoloXNode : public rclcpp::Node
6391

6492
LabelMap label_map_;
6593
std::unique_ptr<tensorrt_yolox::TrtYoloX> trt_yolox_;
94+
bool is_roi_overlap_segment_;
95+
bool is_publish_color_mask_;
96+
float overlap_roi_score_threshold_;
97+
// TODO(badai-nguyen): change to function
98+
std::map<std::string, int> remap_roi_to_semantic_ = {
99+
{"UNKNOWN", 3}, // other
100+
{"ANIMAL", 0}, // other
101+
{"PEDESTRIAN", 6}, // person
102+
{"CAR", 7}, // car
103+
{"TRUCK", 7}, // truck
104+
{"BUS", 7}, // bus
105+
{"BICYCLE", 8}, // bicycle
106+
{"MOTORBIKE", 8}, // motorcycle
107+
};
108+
RoiOverlaySemsegLabel roi_overlay_segment_labels_;
66109
std::unique_ptr<tier4_autoware_utils::StopWatch<std::chrono::milliseconds>> stop_watch_ptr_;
67110
std::unique_ptr<tier4_autoware_utils::DebugPublisher> debug_publisher_;
68111
};

perception/tensorrt_yolox/launch/yolox_s_plus_opt.launch.xml

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,30 @@
11
<?xml version="1.0"?>
22
<launch>
3-
<!-- cspell:ignore finetune -->
3+
<!-- cspell: ignore semseg, finetune -->
44
<arg name="input/image" default="/sensing/camera/camera0/image_rect_color"/>
55
<arg name="output/objects" default="/perception/object_recognition/detection/rois0"/>
6-
<arg name="model_name" default="yolox-sPlus-T4-960x960-pseudo-finetune"/>
6+
<arg name="output/mask" default="/perception/object_recognition/detection/mask0"/>
7+
<arg
8+
name="model_name"
9+
default="yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls"
10+
description="options `yolox-sPlus-T4-960x960-pseudo-finetune` if only detection is needed, `yolox-sPlus-opt-pseudoV2-T4-960x960-T4-seg16cls` if sematic segmentation is also needed"
11+
/>
712
<arg name="data_path" default="$(env HOME)/autoware_data" description="packages data and artifacts directory path"/>
813
<arg name="yolox_param_path" default="$(find-pkg-share tensorrt_yolox)/config/yolox_s_plus_opt.param.yaml"/>
914
<arg name="use_decompress" default="true" description="use image decompress"/>
1015
<arg name="build_only" default="false" description="exit after trt engine is built"/>
1116

17+
<arg name="param_file" default="$(find-pkg-share image_transport_decompressor)/config/image_transport_decompressor.param.yaml"/>
1218
<node pkg="image_transport_decompressor" exec="image_transport_decompressor_node" name="image_transport_decompressor_node" if="$(var use_decompress)">
1319
<remap from="~/input/compressed_image" to="$(var input/image)/compressed"/>
1420
<remap from="~/output/raw_image" to="$(var input/image)"/>
21+
<param from="$(var param_file)"/>
1522
</node>
1623

1724
<node pkg="tensorrt_yolox" exec="tensorrt_yolox_node_exe" name="tensorrt_yolox" output="screen">
1825
<remap from="~/in/image" to="$(var input/image)"/>
1926
<remap from="~/out/objects" to="$(var output/objects)"/>
27+
<remap from="~/out/mask" to="$(var output/mask)"/>
2028
<param from="$(var yolox_param_path)" allow_substs="true"/>
2129
<param name="build_only" value="$(var build_only)"/>
2230
</node>

perception/tensorrt_yolox/src/preprocess.cu

+35
Original file line numberDiff line numberDiff line change
@@ -594,4 +594,39 @@ void multi_scale_resize_bilinear_letterbox_nhwc_to_nchw32_batch_gpu(
594594
cuda_gridsize(N), block, 0, stream>>>(N, dst, src, d_h, d_w, s_h, s_w, d_roi, norm, batch);
595595
}
596596
597+
__global__ void argmax_gpu_kernel(
598+
int N, unsigned char * dst, float * src, int dst_h, int dst_w, int src_c, int src_h, int src_w,
599+
int batch)
600+
{
601+
// NHWC
602+
int index = (blockIdx.x + blockIdx.y * gridDim.x) * blockDim.x + threadIdx.x;
603+
604+
if (index >= N) return;
605+
int c = 0;
606+
int w = index % dst_w;
607+
int h = index / (dst_w);
608+
609+
int b;
610+
for (b = 0; b < batch; b++) {
611+
float max_prob = 0.0;
612+
int max_index = 0;
613+
int dst_index = w + dst_w * h + b * dst_h * dst_w;
614+
for (c = 0; c < src_c; c++) {
615+
int src_index = w + src_w * h + c * src_h * src_w + b * src_c * src_h * src_w;
616+
max_index = max_prob < src[src_index] ? c : max_index;
617+
max_prob = max_prob < src[src_index] ? src[src_index] : max_prob;
618+
}
619+
dst[dst_index] = max_index;
620+
}
621+
}
622+
623+
void argmax_gpu(
624+
unsigned char * dst, float * src, int d_w, int d_h, int s_w, int s_h, int s_c, int batch,
625+
cudaStream_t stream)
626+
{
627+
int N = d_w * d_h;
628+
argmax_gpu_kernel<<<cuda_gridsize(N), block, 0, stream>>>(
629+
N, dst, src, d_h, d_w, s_c, s_h, s_w, batch);
630+
}
631+
597632
} // namespace tensorrt_yolox

0 commit comments

Comments
 (0)