Skip to content

Commit 060f658

Browse files
authored
Add softmax to keypoints postprocessing (#269)
* Ad softmax to kp postprocessing * Fix softmax accuracy issue * Update cpp formatting
1 parent 3b37363 commit 060f658

File tree

6 files changed

+65
-17
lines changed

6 files changed

+65
-17
lines changed

src/cpp/models/include/models/keypoint_detection.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2024 Intel Corporation
2+
* Copyright (C) 2020-2025 Intel Corporation
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -38,6 +38,8 @@ class KeypointDetectionModel : public ImageModel {
3838
static std::string ModelType;
3939

4040
protected:
41+
bool apply_softmax = true;
42+
4143
void prepareInputsOutputs(std::shared_ptr<ov::Model>& model) override;
4244
void updateModelInfo() override;
4345
void init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority);

src/cpp/models/include/models/results.h

+5-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#pragma once
77
#include <map>
88
#include <memory>
9+
#include <numeric>
910
#include <opencv2/core.hpp>
1011
#include <opencv2/imgproc.hpp>
1112
#include <openvino/openvino.hpp>
@@ -356,8 +357,11 @@ struct DetectedKeypoints {
356357
for (const cv::Point2f& keypoint : prediction.keypoints) {
357358
kp_x_sum += keypoint.x;
358359
}
360+
float scores_sum = std::accumulate(prediction.scores.begin(), prediction.scores.end(), 0.f);
361+
359362
os << "keypoints: (" << prediction.keypoints.size() << ", 2), keypoints_x_sum: ";
360-
os << std::fixed << std::setprecision(3) << kp_x_sum << ", scores: (" << prediction.scores.size() << ",)";
363+
os << std::fixed << std::setprecision(3) << kp_x_sum << ", scores: (" << prediction.scores.size() << ",) "
364+
<< std::fixed << std::setprecision(3) << scores_sum;
361365
return os;
362366
}
363367

src/cpp/models/src/keypoint_detection.cpp

+26-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020-2024 Intel Corporation
2+
* Copyright (C) 2020-2025 Intel Corporation
33
* SPDX-License-Identifier: Apache-2.0
44
*/
55

@@ -18,33 +18,48 @@
1818

1919
namespace {
2020

21-
void colArgMax(const cv::Mat& src, cv::Mat& dst_locs, cv::Mat& dst_values) {
21+
void colArgMax(const cv::Mat& src,
22+
cv::Mat& dst_locs,
23+
cv::Mat& dst_values,
24+
bool apply_softmax = false,
25+
float eps = 1e-6f) {
2226
dst_locs = cv::Mat::zeros(src.rows, 1, CV_32S);
2327
dst_values = cv::Mat::zeros(src.rows, 1, CV_32F);
2428

25-
for (int row = 0; row < src.rows; row++) {
29+
for (int row = 0; row < src.rows; ++row) {
2630
const float* ptr_row = src.ptr<float>(row);
2731
int max_val_idx = 0;
28-
dst_values.at<float>(row) = ptr_row[max_val_idx];
32+
float max_val = ptr_row[0];
2933
for (int col = 1; col < src.cols; ++col) {
30-
if (ptr_row[col] > ptr_row[max_val_idx]) {
34+
if (ptr_row[col] > max_val) {
3135
max_val_idx = col;
3236
dst_locs.at<int>(row) = max_val_idx;
33-
dst_values.at<float>(row) = ptr_row[col];
37+
max_val = ptr_row[col];
3438
}
3539
}
40+
41+
if (apply_softmax) {
42+
float sum = 0.0f;
43+
for (int col = 0; col < src.cols; ++col) {
44+
sum += exp(ptr_row[col] - max_val);
45+
}
46+
dst_values.at<float>(row) = exp(ptr_row[max_val_idx] - max_val) / (sum + eps);
47+
} else {
48+
dst_values.at<float>(row) = max_val;
49+
}
3650
}
3751
}
3852

3953
DetectedKeypoints decode_simcc(const cv::Mat& simcc_x,
4054
const cv::Mat& simcc_y,
4155
const cv::Point2f& extra_scale = cv::Point2f(1.f, 1.f),
56+
bool apply_softmax = false,
4257
float simcc_split_ratio = 2.0f) {
4358
cv::Mat x_locs, max_val_x;
44-
colArgMax(simcc_x, x_locs, max_val_x);
59+
colArgMax(simcc_x, x_locs, max_val_x, apply_softmax);
4560

4661
cv::Mat y_locs, max_val_y;
47-
colArgMax(simcc_y, y_locs, max_val_y);
62+
colArgMax(simcc_y, y_locs, max_val_y, apply_softmax);
4863

4964
std::vector<cv::Point2f> keypoints(x_locs.rows);
5065
cv::Mat scores = cv::Mat::zeros(x_locs.rows, 1, CV_32F);
@@ -67,6 +82,7 @@ std::string KeypointDetectionModel::ModelType = "keypoint_detection";
6782

6883
void KeypointDetectionModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) {
6984
labels = get_from_any_maps("labels", top_priority, mid_priority, labels);
85+
apply_softmax = get_from_any_maps("apply_softmax", top_priority, mid_priority, apply_softmax);
7086
}
7187

7288
KeypointDetectionModel::KeypointDetectionModel(std::shared_ptr<ov::Model>& model, const ov::AnyMap& configuration)
@@ -204,7 +220,8 @@ std::unique_ptr<ResultBase> KeypointDetectionModel::postprocess(InferenceResult&
204220
float inverted_scale_x = static_cast<float>(image_data.inputImgWidth) / netInputWidth,
205221
inverted_scale_y = static_cast<float>(image_data.inputImgHeight) / netInputHeight;
206222

207-
result->poses.emplace_back(decode_simcc(pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y}));
223+
result->poses.emplace_back(
224+
decode_simcc(pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y}, apply_softmax));
208225
return std::unique_ptr<ResultBase>(result);
209226
}
210227

src/python/model_api/models/keypoint_detection.py

+29-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (C) 2020-2024 Intel Corporation
2+
# Copyright (C) 2020-2025 Intel Corporation
33
# SPDX-License-Identifier: Apache-2.0
44
#
55

@@ -11,7 +11,7 @@
1111

1212
from .image_model import ImageModel
1313
from .result import DetectedKeypoints, DetectionResult
14-
from .types import ListValue
14+
from .types import BooleanValue, ListValue
1515

1616

1717
class KeypointDetectionModel(ImageModel):
@@ -30,6 +30,7 @@ def __init__(self, inference_adapter, configuration: dict = {}, preload=False):
3030
"""
3131
super().__init__(inference_adapter, configuration, preload)
3232
self._check_io_number(1, 2)
33+
self.apply_softmax: bool
3334

3435
def postprocess(
3536
self,
@@ -46,7 +47,11 @@ def postprocess(
4647
DetectedKeypoints: detected keypoints
4748
"""
4849
encoded_kps = list(outputs.values())
49-
batch_keypoints, batch_scores = _decode_simcc(*encoded_kps)
50+
batch_keypoints, batch_scores = _decode_simcc(
51+
encoded_kps[0],
52+
encoded_kps[1],
53+
apply_softmax=self.apply_softmax,
54+
)
5055
orig_h, orig_w = meta["original_shape"][:2]
5156
kp_scale_h = orig_h / self.h
5257
kp_scale_w = orig_w / self.w
@@ -63,6 +68,10 @@ def parameters(cls) -> dict:
6368
value_type=str,
6469
default_value=[],
6570
),
71+
"apply_softmax": BooleanValue(
72+
default_value=True,
73+
description="Whether to apply softmax on the heatmap.",
74+
),
6675
},
6776
)
6877
return parameters
@@ -119,21 +128,24 @@ def _decode_simcc(
119128
simcc_x: np.ndarray,
120129
simcc_y: np.ndarray,
121130
simcc_split_ratio: float = 2.0,
131+
apply_softmax: bool = False,
122132
) -> tuple[np.ndarray, np.ndarray]:
123133
"""Decodes keypoint coordinates from SimCC representations. The decoded coordinates are in the input image space.
124134
125135
Args:
126136
simcc_x (np.ndarray): SimCC label for x-axis
127137
simcc_y (np.ndarray): SimCC label for y-axis
128138
simcc_split_ratio (float): The ratio of the label size to the input size.
139+
apply_softmax (bool): whether to apply softmax on the heatmap.
140+
Defaults to False.
129141
130142
Returns:
131143
tuple:
132144
- keypoints (np.ndarray): Decoded coordinates in shape (N, K, D)
133145
- scores (np.ndarray): The keypoint scores in shape (N, K).
134146
It usually represents the confidence of the keypoint prediction
135147
"""
136-
keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y)
148+
keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y, apply_softmax)
137149

138150
# Unsqueeze the instance dimension for single-instance results
139151
if keypoints.ndim == 2:
@@ -148,6 +160,8 @@ def _decode_simcc(
148160
def _get_simcc_maximum(
149161
simcc_x: np.ndarray,
150162
simcc_y: np.ndarray,
163+
apply_softmax: bool = False,
164+
softmax_eps: float = 1e-06,
151165
) -> tuple[np.ndarray, np.ndarray]:
152166
"""Get maximum response location and value from simcc representations.
153167
@@ -160,6 +174,10 @@ def _get_simcc_maximum(
160174
Args:
161175
simcc_x (np.ndarray): x-axis SimCC in shape (K, Wx) or (N, K, Wx)
162176
simcc_y (np.ndarray): y-axis SimCC in shape (K, Hy) or (N, K, Hy)
177+
apply_softmax (bool): whether to apply softmax on the heatmap.
178+
Defaults to False.
179+
softmax_eps (flat): a constant to avoid division by zero in softmax.
180+
Defaults to 1e-6.
163181
164182
Returns:
165183
tuple:
@@ -185,6 +203,13 @@ def _get_simcc_maximum(
185203
else:
186204
batch_size = None
187205

206+
if apply_softmax:
207+
simcc_x = simcc_x - np.max(simcc_x, axis=1, keepdims=True)
208+
simcc_y = simcc_y - np.max(simcc_y, axis=1, keepdims=True)
209+
ex, ey = np.exp(simcc_x), np.exp(simcc_y)
210+
simcc_x = ex / (np.sum(ex, axis=1, keepdims=True) + softmax_eps)
211+
simcc_y = ey / (np.sum(ey, axis=1, keepdims=True) + softmax_eps)
212+
188213
x_locs = np.argmax(simcc_x, axis=1)
189214
y_locs = np.argmax(simcc_y, axis=1)
190215
locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32)

src/python/model_api/models/result/keypoint.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,5 +17,5 @@ def __str__(self):
1717
return (
1818
f"keypoints: {self.keypoints.shape}, "
1919
f"keypoints_x_sum: {np.sum(self.keypoints[:, :1]):.3f}, "
20-
f"scores: {self.scores.shape}"
20+
f"scores: {self.scores.shape} {np.sum(self.scores):.3f}"
2121
)

tests/python/accuracy/public_scope.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@
425425
{
426426
"image": "coco128/images/train2017/000000000471.jpg",
427427
"reference": [
428-
"keypoints: (17, 2), keypoints_x_sum: 5700.000, scores: (17,)"
428+
"keypoints: (17, 2), keypoints_x_sum: 5700.000, scores: (17,) 0.049"
429429
]
430430
}
431431
]

0 commit comments

Comments
 (0)