@@ -53,19 +53,29 @@ void colArgMax(const cv::Mat& src,
53
53
DetectedKeypoints decode_simcc (const cv::Mat& simcc_x,
54
54
const cv::Mat& simcc_y,
55
55
const cv::Point2f& extra_scale = cv::Point2f(1 .f, 1 .f),
56
+ const cv::Point2i& extra_offset = cv::Point2f(0 .f, 0 .f),
56
57
bool apply_softmax = false,
57
- float simcc_split_ratio = 2.0f) {
58
+ float simcc_split_ratio = 2.0f,
59
+ float decode_beta = 150.0f,
60
+ float sigma = 6.0f) {
58
61
cv::Mat x_locs, max_val_x;
59
- colArgMax (simcc_x, x_locs, max_val_x, apply_softmax );
62
+ colArgMax (simcc_x, x_locs, max_val_x, false );
60
63
61
64
cv::Mat y_locs, max_val_y;
62
- colArgMax (simcc_y, y_locs, max_val_y, apply_softmax);
65
+ colArgMax (simcc_y, y_locs, max_val_y, false );
66
+
67
+ if (apply_softmax) {
68
+ cv::Mat tmp_locs;
69
+ colArgMax (decode_beta * sigma * simcc_x, tmp_locs, max_val_x, true );
70
+ colArgMax (decode_beta * sigma * simcc_y, tmp_locs, max_val_y, true );
71
+ }
63
72
64
73
std::vector<cv::Point2f> keypoints (x_locs.rows );
65
74
cv::Mat scores = cv::Mat::zeros (x_locs.rows , 1 , CV_32F);
66
- for (int i = 0 ; i < x_locs.rows ; i++) {
67
- keypoints[i] =
68
- cv::Point2f (x_locs.at <int >(i) * extra_scale.x , y_locs.at <int >(i) * extra_scale.y ) / simcc_split_ratio;
75
+ for (int i = 0 ; i < x_locs.rows ; ++i) {
76
+ keypoints[i] = cv::Point2f ((x_locs.at <int >(i) - extra_offset.x ) * extra_scale.x ,
77
+ (y_locs.at <int >(i) - extra_offset.y ) * extra_scale.y ) /
78
+ simcc_split_ratio;
69
79
scores.at <float >(i) = std::min (max_val_x.at <float >(i), max_val_y.at <float >(i));
70
80
71
81
if (scores.at <float >(i) <= 0 .f ) {
@@ -220,8 +230,22 @@ std::unique_ptr<ResultBase> KeypointDetectionModel::postprocess(InferenceResult&
220
230
float inverted_scale_x = static_cast <float >(image_data.inputImgWidth ) / netInputWidth,
221
231
inverted_scale_y = static_cast <float >(image_data.inputImgHeight ) / netInputHeight;
222
232
233
+ int pad_left = 0 , pad_top = 0 ;
234
+ if (RESIZE_KEEP_ASPECT == resizeMode || RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
235
+ inverted_scale_x = inverted_scale_y = std::max (inverted_scale_x, inverted_scale_y);
236
+ if (RESIZE_KEEP_ASPECT_LETTERBOX == resizeMode) {
237
+ pad_left = (netInputWidth -
238
+ static_cast <int >(std::round (static_cast <float >(image_data.inputImgWidth ) / inverted_scale_x))) /
239
+ 2 ;
240
+ pad_top = (netInputHeight -
241
+ static_cast <int >(std::round (static_cast <float >(image_data.inputImgHeight ) / inverted_scale_y))) /
242
+ 2 ;
243
+ }
244
+ }
245
+
223
246
result->poses .emplace_back (
224
- decode_simcc (pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y}, apply_softmax));
247
+ decode_simcc (pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y}, {pad_left, pad_top}, apply_softmax));
248
+
225
249
return std::unique_ptr<ResultBase>(result);
226
250
}
227
251
0 commit comments