Skip to content

Commit f0186cc

Browse files
committedMar 6, 2025
Fix kp posptrocessing
1 parent 74a2b27 commit f0186cc

File tree

1 file changed

+52
-20
lines changed

1 file changed

+52
-20
lines changed
 

‎model_api/python/model_api/models/keypoint_detection.py

+52-20
Original file line numberDiff line numberDiff line change
@@ -26,30 +26,29 @@
2626

2727

2828
class KeypointDetectionModel(ImageModel):
29-
"""
30-
A wrapper that implements a basic keypoint regression model.
31-
"""
29+
"""A wrapper that implements a basic keypoint regression model."""
3230

3331
__model__ = "keypoint_detection"
3432

35-
def __init__(self, inference_adapter, configuration=dict(), preload=False):
36-
"""
37-
Initializes the keypoint detection model.
33+
def __init__(self, inference_adapter, configuration: dict = {}, preload=False):
34+
"""Initializes the keypoint detection model.
3835
3936
Args:
4037
inference_adapter (InferenceAdapter): inference adapter containing the underlying model.
4138
configuration (dict, optional): configuration overrides the model parameters (see parameters() method).
42-
Defaults to dict().
39+
Defaults to {}.
4340
preload (bool, optional): forces inference adapter to load the model. Defaults to False.
4441
"""
4542
super().__init__(inference_adapter, configuration, preload)
4643
self._check_io_number(1, 2)
44+
self.apply_softmax: bool
4745

4846
def postprocess(
49-
self, outputs: dict[str, np.ndarray], meta: dict[str, Any]
47+
self,
48+
outputs: dict[str, np.ndarray],
49+
meta: dict[str, Any],
5050
) -> DetectedKeypoints:
51-
"""
52-
Applies SCC decoded to the model outputs.
51+
"""Applies SCC decoded to the model outputs.
5352
5453
Args:
5554
outputs (dict[str, np.ndarray]): raw outputs of the model
@@ -60,12 +59,26 @@ def postprocess(
6059
"""
6160
encoded_kps = list(outputs.values())
6261
batch_keypoints, batch_scores = _decode_simcc(
63-
*encoded_kps, apply_softmax=self.apply_softmax
62+
encoded_kps[0],
63+
encoded_kps[1],
64+
apply_softmax=self.apply_softmax,
6465
)
6566
orig_h, orig_w = meta["original_shape"][:2]
6667
kp_scale_h = orig_h / self.h
6768
kp_scale_w = orig_w / self.w
68-
batch_keypoints = batch_keypoints.squeeze() * np.array([kp_scale_w, kp_scale_h])
69+
70+
batch_keypoints = batch_keypoints.squeeze()
71+
72+
if self.resize_type in ["fit_to_window", "fit_to_window_letterbox"]:
73+
inverted_scale = max(kp_scale_h, kp_scale_w)
74+
kp_scale_h = kp_scale_w = inverted_scale
75+
if self.resize_type == "fit_to_window_letterbox":
76+
pad_left = (self.w - round(orig_w / inverted_scale)) // 2
77+
pad_top = (self.h - round(orig_h / inverted_scale)) // 2
78+
batch_keypoints -= np.array([pad_left, pad_top])
79+
80+
batch_keypoints *= np.array([kp_scale_w, kp_scale_h])
81+
6982
return DetectedKeypoints(batch_keypoints, batch_scores.squeeze())
7083

7184
@classmethod
@@ -74,13 +87,15 @@ def parameters(cls) -> dict:
7487
parameters.update(
7588
{
7689
"labels": ListValue(
77-
description="List of class labels", value_type=str, default_value=[]
90+
description="List of class labels",
91+
value_type=str,
92+
default_value=[],
7893
),
7994
"apply_softmax": BooleanValue(
8095
default_value=True,
8196
description="Whether to apply softmax on the heatmap.",
8297
),
83-
}
98+
},
8499
)
85100
return parameters
86101

@@ -137,23 +152,33 @@ def _decode_simcc(
137152
simcc_y: np.ndarray,
138153
simcc_split_ratio: float = 2.0,
139154
apply_softmax: bool = False,
155+
decode_beta: float = 150.0,
156+
sigma: float | int = 6.0,
140157
) -> tuple[np.ndarray, np.ndarray]:
141158
"""Decodes keypoint coordinates from SimCC representations. The decoded coordinates are in the input image space.
142159
143160
Args:
144161
simcc_x (np.ndarray): SimCC label for x-axis
145162
simcc_y (np.ndarray): SimCC label for y-axis
146163
simcc_split_ratio (float): The ratio of the label size to the input size.
147-
apply_softmax (bool): whether to apply softmax on the heatmap.
164+
apply_softmax (bool): whether to apply softmax during scores generation.
148165
Defaults to False.
166+
decode_beta (float): The beta value for decoding scores with softmax. Defaults
167+
to 150.0.
168+
sigma (float | int): The sigma value in the Gaussian SimCC
169+
label. Defaults to 6.0
149170
150171
Returns:
151172
tuple:
152173
- keypoints (np.ndarray): Decoded coordinates in shape (N, K, D)
153174
- scores (np.ndarray): The keypoint scores in shape (N, K).
154175
It usually represents the confidence of the keypoint prediction
155176
"""
156-
keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y, apply_softmax)
177+
keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y)
178+
if apply_softmax:
179+
_, scores = _get_simcc_maximum(
180+
decode_beta * sigma * simcc_x, decode_beta * sigma * simcc_y, apply_softmax
181+
)
157182

158183
# Unsqueeze the instance dimension for single-instance results
159184
if keypoints.ndim == 2:
@@ -169,6 +194,7 @@ def _get_simcc_maximum(
169194
simcc_x: np.ndarray,
170195
simcc_y: np.ndarray,
171196
apply_softmax: bool = False,
197+
softmax_eps: float = 1e-06,
172198
) -> tuple[np.ndarray, np.ndarray]:
173199
"""Get maximum response location and value from simcc representations.
174200
@@ -183,6 +209,8 @@ def _get_simcc_maximum(
183209
simcc_y (np.ndarray): y-axis SimCC in shape (K, Hy) or (N, K, Hy)
184210
apply_softmax (bool): whether to apply softmax on the heatmap.
185211
Defaults to False.
212+
softmax_eps (flat): a constant to avoid division by zero in softmax.
213+
Defaults to 1e-6.
186214
187215
Returns:
188216
tuple:
@@ -212,17 +240,21 @@ def _get_simcc_maximum(
212240
simcc_x = simcc_x - np.max(simcc_x, axis=1, keepdims=True)
213241
simcc_y = simcc_y - np.max(simcc_y, axis=1, keepdims=True)
214242
ex, ey = np.exp(simcc_x), np.exp(simcc_y)
215-
simcc_x = ex / np.sum(ex, axis=1, keepdims=True)
216-
simcc_y = ey / np.sum(ey, axis=1, keepdims=True)
243+
simcc_x = ex / (np.sum(ex, axis=1, keepdims=True) + softmax_eps)
244+
simcc_y = ey / (np.sum(ey, axis=1, keepdims=True) + softmax_eps)
217245

218246
x_locs = np.argmax(simcc_x, axis=1)
219247
y_locs = np.argmax(simcc_y, axis=1)
220248
locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32)
221249
max_val_x = np.take_along_axis(
222-
simcc_x, np.expand_dims(x_locs, axis=-1), axis=-1
250+
simcc_x,
251+
np.expand_dims(x_locs, axis=-1),
252+
axis=-1,
223253
).squeeze(axis=-1)
224254
max_val_y = np.take_along_axis(
225-
simcc_y, np.expand_dims(y_locs, axis=-1), axis=-1
255+
simcc_y,
256+
np.expand_dims(y_locs, axis=-1),
257+
axis=-1,
226258
).squeeze(axis=-1)
227259

228260
mask = max_val_x > max_val_y

0 commit comments

Comments
 (0)