21
21
import numpy as np
22
22
23
23
from .image_model import ImageModel
24
- from .types import ListValue
24
+ from .types import BooleanValue , ListValue
25
25
from .utils import DetectedKeypoints , Detection
26
26
27
27
@@ -59,7 +59,9 @@ def postprocess(
59
59
DetectedKeypoints: detected keypoints
60
60
"""
61
61
encoded_kps = list (outputs .values ())
62
- batch_keypoints , batch_scores = _decode_simcc (* encoded_kps )
62
+ batch_keypoints , batch_scores = _decode_simcc (
63
+ * encoded_kps , apply_softmax = self .apply_softmax
64
+ )
63
65
orig_h , orig_w = meta ["original_shape" ][:2 ]
64
66
kp_scale_h = orig_h / self .h
65
67
kp_scale_w = orig_w / self .w
@@ -74,6 +76,10 @@ def parameters(cls) -> dict:
74
76
"labels" : ListValue (
75
77
description = "List of class labels" , value_type = str , default_value = []
76
78
),
79
+ "apply_softmax" : BooleanValue (
80
+ default_value = True ,
81
+ description = "Whether to apply softmax on the heatmap." ,
82
+ ),
77
83
}
78
84
)
79
85
return parameters
@@ -127,22 +133,27 @@ def predict_crops(self, crops: list[np.ndarray]) -> list[DetectedKeypoints]:
127
133
128
134
129
135
def _decode_simcc (
130
- simcc_x : np .ndarray , simcc_y : np .ndarray , simcc_split_ratio : float = 2.0
136
+ simcc_x : np .ndarray ,
137
+ simcc_y : np .ndarray ,
138
+ simcc_split_ratio : float = 2.0 ,
139
+ apply_softmax : bool = False ,
131
140
) -> tuple [np .ndarray , np .ndarray ]:
132
141
"""Decodes keypoint coordinates from SimCC representations. The decoded coordinates are in the input image space.
133
142
134
143
Args:
135
144
simcc_x (np.ndarray): SimCC label for x-axis
136
145
simcc_y (np.ndarray): SimCC label for y-axis
137
146
simcc_split_ratio (float): The ratio of the label size to the input size.
147
+ apply_softmax (bool): whether to apply softmax on the heatmap.
148
+ Defaults to False.
138
149
139
150
Returns:
140
151
tuple:
141
152
- keypoints (np.ndarray): Decoded coordinates in shape (N, K, D)
142
153
- scores (np.ndarray): The keypoint scores in shape (N, K).
143
154
It usually represents the confidence of the keypoint prediction
144
155
"""
145
- keypoints , scores = _get_simcc_maximum (simcc_x , simcc_y )
156
+ keypoints , scores = _get_simcc_maximum (simcc_x , simcc_y , apply_softmax )
146
157
147
158
# Unsqueeze the instance dimension for single-instance results
148
159
if keypoints .ndim == 2 :
@@ -157,6 +168,7 @@ def _decode_simcc(
157
168
def _get_simcc_maximum (
158
169
simcc_x : np .ndarray ,
159
170
simcc_y : np .ndarray ,
171
+ apply_softmax : bool = False ,
160
172
) -> tuple [np .ndarray , np .ndarray ]:
161
173
"""Get maximum response location and value from simcc representations.
162
174
@@ -169,6 +181,8 @@ def _get_simcc_maximum(
169
181
Args:
170
182
simcc_x (np.ndarray): x-axis SimCC in shape (K, Wx) or (N, K, Wx)
171
183
simcc_y (np.ndarray): y-axis SimCC in shape (K, Hy) or (N, K, Hy)
184
+ apply_softmax (bool): whether to apply softmax on the heatmap.
185
+ Defaults to False.
172
186
173
187
Returns:
174
188
tuple:
@@ -194,6 +208,13 @@ def _get_simcc_maximum(
194
208
else :
195
209
batch_size = None
196
210
211
+ if apply_softmax :
212
+ simcc_x = simcc_x - np .max (simcc_x , axis = 1 , keepdims = True )
213
+ simcc_y = simcc_y - np .max (simcc_y , axis = 1 , keepdims = True )
214
+ ex , ey = np .exp (simcc_x ), np .exp (simcc_y )
215
+ simcc_x = ex / np .sum (ex , axis = 1 , keepdims = True )
216
+ simcc_y = ey / np .sum (ey , axis = 1 , keepdims = True )
217
+
197
218
x_locs = np .argmax (simcc_x , axis = 1 )
198
219
y_locs = np .argmax (simcc_y , axis = 1 )
199
220
locs = np .stack ((x_locs , y_locs ), axis = - 1 ).astype (np .float32 )
0 commit comments