@@ -283,7 +283,8 @@ TrtYoloX::TrtYoloX(
283
283
out_elem_num_ = out_elem_num_ * batch_config[2 ];
284
284
out_elem_num_per_batch_ = static_cast <int >(out_elem_num_ / batch_config[2 ]);
285
285
out_prob_d_ = autoware::cuda_utils::make_unique<float []>(out_elem_num_);
286
- out_prob_h_ = autoware::cuda_utils::make_unique_host<float []>(out_elem_num_, cudaHostAllocPortable);
286
+ out_prob_h_ =
287
+ autoware::cuda_utils::make_unique_host<float []>(out_elem_num_, cudaHostAllocPortable);
287
288
int w = input_dims.d [3 ];
288
289
int h = input_dims.d [2 ];
289
290
int sum_tensors = (w / 8 ) * (h / 8 ) + (w / 16 ) * (h / 16 ) + (w / 32 ) * (h / 32 );
@@ -300,9 +301,11 @@ TrtYoloX::TrtYoloX(
300
301
max_detections_ = out_scores_dims.d [1 ];
301
302
input_d_ = autoware::cuda_utils::make_unique<float []>(batch_config[2 ] * input_size);
302
303
out_num_detections_d_ = autoware::cuda_utils::make_unique<int32_t []>(batch_config[2 ]);
303
- out_boxes_d_ = autoware::cuda_utils::make_unique<float []>(batch_config[2 ] * max_detections_ * 4 );
304
+ out_boxes_d_ =
305
+ autoware::cuda_utils::make_unique<float []>(batch_config[2 ] * max_detections_ * 4 );
304
306
out_scores_d_ = autoware::cuda_utils::make_unique<float []>(batch_config[2 ] * max_detections_);
305
- out_classes_d_ = autoware::cuda_utils::make_unique<int32_t []>(batch_config[2 ] * max_detections_);
307
+ out_classes_d_ =
308
+ autoware::cuda_utils::make_unique<int32_t []>(batch_config[2 ] * max_detections_);
306
309
}
307
310
if (multitask_) {
308
311
// Allocate buffer for segmentation
@@ -317,9 +320,10 @@ TrtYoloX::TrtYoloX(
317
320
}
318
321
segmentation_out_elem_num_per_batch_ =
319
322
static_cast <int >(segmentation_out_elem_num_ / batch_config[2 ]);
320
- segmentation_out_prob_d_ = autoware::cuda_utils::make_unique<float []>(segmentation_out_elem_num_);
321
- segmentation_out_prob_h_ =
322
- autoware::cuda_utils::make_unique_host<float []>(segmentation_out_elem_num_, cudaHostAllocPortable);
323
+ segmentation_out_prob_d_ =
324
+ autoware::cuda_utils::make_unique<float []>(segmentation_out_elem_num_);
325
+ segmentation_out_prob_h_ = autoware::cuda_utils::make_unique_host<float []>(
326
+ segmentation_out_elem_num_, cudaHostAllocPortable);
323
327
}
324
328
if (use_gpu_preprocess) {
325
329
use_gpu_preprocess_ = true ;
@@ -396,7 +400,8 @@ void TrtYoloX::initPreprocessBuffer(int width, int height)
396
400
}
397
401
image_buf_h_ = autoware::cuda_utils::make_unique_host<unsigned char []>(
398
402
width * height * 3 * batch_size_, cudaHostAllocWriteCombined);
399
- image_buf_d_ = autoware::cuda_utils::make_unique<unsigned char []>(width * height * 3 * batch_size_);
403
+ image_buf_d_ =
404
+ autoware::cuda_utils::make_unique<unsigned char []>(width * height * 3 * batch_size_);
400
405
}
401
406
if (multitask_) {
402
407
size_t argmax_out_elem_num = 0 ;
@@ -414,8 +419,8 @@ void TrtYoloX::initPreprocessBuffer(int width, int height)
414
419
size_t out_elem_num = out_w * out_h * batch_size_;
415
420
argmax_out_elem_num += out_elem_num;
416
421
}
417
- argmax_buf_h_ =
418
- autoware::cuda_utils::make_unique_host< unsigned char []>( argmax_out_elem_num, cudaHostAllocPortable);
422
+ argmax_buf_h_ = autoware::cuda_utils::make_unique_host< unsigned char []>(
423
+ argmax_out_elem_num, cudaHostAllocPortable);
419
424
argmax_buf_d_ = autoware::cuda_utils::make_unique<unsigned char []>(argmax_out_elem_num);
420
425
}
421
426
}
@@ -470,8 +475,8 @@ void TrtYoloX::preprocessGpu(const std::vector<cv::Mat> & images)
470
475
scales_.emplace_back (scale);
471
476
image_buf_h_ = autoware::cuda_utils::make_unique_host<unsigned char []>(
472
477
image.cols * image.rows * 3 * batch_size, cudaHostAllocWriteCombined);
473
- image_buf_d_ =
474
- autoware::cuda_utils::make_unique< unsigned char []>( image.cols * image.rows * 3 * batch_size);
478
+ image_buf_d_ = autoware::cuda_utils::make_unique< unsigned char []>(
479
+ image.cols * image.rows * 3 * batch_size);
475
480
}
476
481
int index = b * image.cols * image.rows * 3 ;
477
482
// Copy into pinned memory
@@ -496,8 +501,8 @@ void TrtYoloX::preprocessGpu(const std::vector<cv::Mat> & images)
496
501
497
502
if (multitask_) {
498
503
if (!argmax_buf_h_) {
499
- argmax_buf_h_ =
500
- autoware::cuda_utils::make_unique_host< unsigned char []>( argmax_out_elem_num, cudaHostAllocPortable);
504
+ argmax_buf_h_ = autoware::cuda_utils::make_unique_host< unsigned char []>(
505
+ argmax_out_elem_num, cudaHostAllocPortable);
501
506
}
502
507
if (!argmax_buf_d_) {
503
508
argmax_buf_d_ = autoware::cuda_utils::make_unique<unsigned char []>(argmax_out_elem_num);
@@ -618,8 +623,8 @@ void TrtYoloX::preprocessWithRoiGpu(
618
623
if (!image_buf_h_) {
619
624
image_buf_h_ = autoware::cuda_utils::make_unique_host<unsigned char []>(
620
625
image.cols * image.rows * 3 * batch_size, cudaHostAllocWriteCombined);
621
- image_buf_d_ =
622
- autoware::cuda_utils::make_unique< unsigned char []>( image.cols * image.rows * 3 * batch_size);
626
+ image_buf_d_ = autoware::cuda_utils::make_unique< unsigned char []>(
627
+ image.cols * image.rows * 3 * batch_size);
623
628
}
624
629
int index = b * image.cols * image.rows * 3 ;
625
630
// Copy into pinned memory
@@ -735,7 +740,8 @@ void TrtYoloX::multiScalePreprocessGpu(const cv::Mat & image, const std::vector<
735
740
if (!image_buf_h_) {
736
741
image_buf_h_ = autoware::cuda_utils::make_unique_host<unsigned char []>(
737
742
image.cols * image.rows * 3 * 1 , cudaHostAllocWriteCombined);
738
- image_buf_d_ = autoware::cuda_utils::make_unique<unsigned char []>(image.cols * image.rows * 3 * 1 );
743
+ image_buf_d_ =
744
+ autoware::cuda_utils::make_unique<unsigned char []>(image.cols * image.rows * 3 * 1 );
739
745
}
740
746
int index = 0 * image.cols * image.rows * 3 ;
741
747
// Copy into pinned memory
0 commit comments