@@ -109,7 +109,7 @@ std::vector<tensorrt_yolox::Colormap> get_seg_colormap(const std::string & filen
109
109
std::vector<tensorrt_yolox::Colormap> seg_cmap;
110
110
if (filename != " not-specified" ) {
111
111
std::vector<std::string> color_list = loadListFromTextFile (filename);
112
- for (int i = 0 ; i < ( int ) color_list.size (); i++) {
112
+ for (int i = 0 ; i < static_cast < int >( color_list.size () ); i++) {
113
113
if (i == 0 ) {
114
114
// Skip header
115
115
continue ;
@@ -120,7 +120,7 @@ std::vector<tensorrt_yolox::Colormap> get_seg_colormap(const std::string & filen
120
120
size_t npos = colormapString.find_first_of (' ,' );
121
121
assert (npos != std::string::npos);
122
122
std::string substr = colormapString.substr (0 , npos);
123
- int id = ( int ) std::stoi (trim (substr));
123
+ int id = static_cast < int >( std::stoi (trim (substr) ));
124
124
colormapString.erase (0 , npos + 1 );
125
125
126
126
npos = colormapString.find_first_of (' ,' );
@@ -157,7 +157,7 @@ namespace tensorrt_yolox
157
157
TrtYoloX::TrtYoloX (
158
158
const std::string & model_path, const std::string & precision, const std::string & color_map_path,
159
159
const int num_class, const float score_threshold, const float nms_threshold,
160
- tensorrt_common::BuildConfig build_config, const bool use_gpu_preprocess,
160
+ tensorrt_common::BuildConfig build_config, const bool use_gpu_preprocess, bool publish_color_mask,
161
161
std::string calibration_image_list_path, const double norm_factor,
162
162
[[maybe_unused]] const std::string & cache_dir, const tensorrt_common::BatchConfig & batch_config,
163
163
const size_t max_workspace_size)
@@ -167,7 +167,8 @@ TrtYoloX::TrtYoloX(
167
167
norm_factor_ = norm_factor;
168
168
batch_size_ = batch_config[2 ];
169
169
multitask_ = 0 ;
170
- color_map_ = get_seg_colormap (color_map_path);
170
+ sematic_color_map_ = get_seg_colormap (color_map_path);
171
+ publish_color_mask_ = publish_color_mask;
171
172
if (precision == " int8" ) {
172
173
if (build_config.clip_value <= 0.0 ) {
173
174
if (calibration_image_list_path.empty ()) {
@@ -388,13 +389,14 @@ void TrtYoloX::initPreprocessBuffer(int width, int height)
388
389
for (int m = 0 ; m < multitask_; m++) {
389
390
const auto output_dims =
390
391
trt_common_->getBindingDimensions (m + 2 ); // 0 : input, 1 : output for detections
391
- const float scale =
392
- std::min (output_dims.d [3 ] / float (width), output_dims.d [2 ] / float (height));
393
- int out_w = (int )(width * scale);
394
- int out_h = (int )(height * scale);
395
- // size_t out_elem_num = std::accumulate(
392
+ const float scale = std::min (
393
+ output_dims.d [3 ] / static_cast <float >(width),
394
+ output_dims.d [2 ] / static_cast <float >(height));
395
+ int out_w = static_cast <int >(width * scale);
396
+ int out_h = static_cast <int >(height * scale);
397
+ // size_t out_elem_num = std::accumulate(
396
398
// output_dims.d + 1, output_dims.d + output_dims.nbDims, 1, std::multiplies<int>());
397
- // out_elem_num = out_elem_num * batch_size_;
399
+ // out_elem_num = out_elem_num * batch_size_;
398
400
size_t out_elem_num = out_w * out_h * batch_size_;
399
401
argmax_out_elem_num += out_elem_num;
400
402
}
@@ -468,8 +470,9 @@ void TrtYoloX::preprocessGpu(const std::vector<cv::Mat> & images)
468
470
for (int m = 0 ; m < multitask_; m++) {
469
471
const auto output_dims =
470
472
trt_common_->getBindingDimensions (m + 2 ); // 0: input, 1: output for detections
471
- const float scale =
472
- std::min (output_dims.d [3 ] / float (image.cols ), output_dims.d [2 ] / float (image.rows ));
473
+ const float scale = std::min (
474
+ output_dims.d [3 ] / static_cast <float >(image.cols ),
475
+ output_dims.d [2 ] / static_cast <float >(image.rows ));
473
476
int out_w = static_cast <int >(image.cols * scale);
474
477
int out_h = static_cast <int >(image.rows * scale);
475
478
argmax_out_elem_num += out_w * out_h * batch_size;
@@ -545,8 +548,8 @@ void TrtYoloX::preprocess(const std::vector<cv::Mat> & images)
545
548
}
546
549
547
550
bool TrtYoloX::doInference (
548
- const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & mask ,
549
- [[maybe_unused]] cv::Mat & color_mask )
551
+ const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector< cv::Mat> & masks ,
552
+ [[maybe_unused]] std::vector< cv::Mat> & color_masks )
550
553
{
551
554
if (!trt_common_->isInitialized ()) {
552
555
return false ;
@@ -559,7 +562,7 @@ bool TrtYoloX::doInference(
559
562
}
560
563
561
564
if (needs_output_decode_) {
562
- return feedforwardAndDecode (images, objects, mask, color_mask );
565
+ return feedforwardAndDecode (images, objects, masks, color_masks );
563
566
} else {
564
567
return feedforward (images, objects);
565
568
}
@@ -799,8 +802,8 @@ void TrtYoloX::multiScalePreprocess(const cv::Mat & image, const std::vector<cv:
799
802
bool TrtYoloX::doInferenceWithRoi (
800
803
const std::vector<cv::Mat> & images, ObjectArrays & objects, const std::vector<cv::Rect > & rois)
801
804
{
802
- cv::Mat mask ;
803
- cv::Mat color_mask ;
805
+ std::vector< cv::Mat> masks ;
806
+ std::vector< cv::Mat> color_masks ;
804
807
if (!trt_common_->isInitialized ()) {
805
808
return false ;
806
809
}
@@ -811,7 +814,7 @@ bool TrtYoloX::doInferenceWithRoi(
811
814
}
812
815
813
816
if (needs_output_decode_) {
814
- return feedforwardAndDecode (images, objects, mask, color_mask );
817
+ return feedforwardAndDecode (images, objects, masks, color_masks );
815
818
} else {
816
819
return feedforward (images, objects);
817
820
}
@@ -890,8 +893,8 @@ bool TrtYoloX::feedforward(const std::vector<cv::Mat> & images, ObjectArrays & o
890
893
}
891
894
892
895
bool TrtYoloX::feedforwardAndDecode (
893
- const std::vector<cv::Mat> & images, ObjectArrays & objects, cv::Mat & out_mask ,
894
- [[maybe_unused]] cv::Mat & color_mask )
896
+ const std::vector<cv::Mat> & images, ObjectArrays & objects, std::vector< cv::Mat> & out_masks ,
897
+ [[maybe_unused]] std::vector< cv::Mat> & color_masks )
895
898
{
896
899
std::vector<void *> buffers = {input_d_.get (), out_prob_d_.get ()};
897
900
if (multitask_) {
@@ -914,26 +917,31 @@ bool TrtYoloX::feedforwardAndDecode(
914
917
915
918
for (size_t i = 0 ; i < batch_size; ++i) {
916
919
auto image_size = images[i].size ();
920
+ auto & out_mask = out_masks[i];
921
+ auto & color_mask = color_masks[i];
917
922
float * batch_prob = out_prob_h_.get () + (i * out_elem_num_per_batch_);
918
923
ObjectArray object_array;
919
924
decodeOutputs (batch_prob, object_array, scales_[i], image_size);
925
+ // add refine mask using object
920
926
objects.emplace_back (object_array);
921
927
if (multitask_) {
922
928
segmentation_masks_.clear ();
923
929
float * segmentation_results =
924
930
segmentation_out_prob_h_.get () + (i * segmentation_out_elem_num_per_batch_);
925
931
size_t counter = 0 ;
926
- int batch = (int )(segmentation_out_elem_num_ / segmentation_out_elem_num_per_batch_);
932
+ int batch =
933
+ static_cast <int >(segmentation_out_elem_num_ / segmentation_out_elem_num_per_batch_);
927
934
for (int m = 0 ; m < multitask_; m++) {
928
935
const auto output_dims =
929
936
trt_common_->getBindingDimensions (m + 2 ); // 0 : input, 1 : output for detections
930
937
size_t out_elem_num = std::accumulate (
931
938
output_dims.d + 1 , output_dims.d + output_dims.nbDims , 1 , std::multiplies<int >());
932
939
out_elem_num = out_elem_num * batch;
933
940
const float scale = std::min (
934
- output_dims.d [3 ] / float (image_size.width ), output_dims.d [2 ] / float (image_size.height ));
935
- int out_w = (int )(image_size.width * scale);
936
- int out_h = (int )(image_size.height * scale);
941
+ output_dims.d [3 ] / static_cast <float >(image_size.width ),
942
+ output_dims.d [2 ] / static_cast <float >(image_size.height ));
943
+ int out_w = static_cast <int >(image_size.width * scale);
944
+ int out_h = static_cast <int >(image_size.height * scale);
937
945
cv::Mat mask;
938
946
if (use_gpu_preprocess_) {
939
947
float * d_segmentation_results =
@@ -945,8 +953,16 @@ bool TrtYoloX::feedforwardAndDecode(
945
953
segmentation_masks_.push_back (mask);
946
954
counter += out_elem_num;
947
955
}
948
- out_mask = segmentation_masks_.at (0 );
949
- color_mask = getColorizedMask (0 , color_map_);
956
+ } else {
957
+ continue ;
958
+ }
959
+ // Assume semantic segmentation is first task
960
+ // This should remove when the segmentation accuracy is high
961
+ out_mask = segmentation_masks_.at (0 );
962
+
963
+ // publish color mask for visualization
964
+ if (publish_color_mask_) {
965
+ color_mask = getColorizedMask (0 , sematic_color_map_);
950
966
}
951
967
}
952
968
return true ;
0 commit comments