26
26
27
27
28
28
class KeypointDetectionModel (ImageModel ):
29
- """
30
- A wrapper that implements a basic keypoint regression model.
31
- """
29
+ """A wrapper that implements a basic keypoint regression model."""
32
30
33
31
__model__ = "keypoint_detection"
34
32
35
- def __init__ (self , inference_adapter , configuration = dict (), preload = False ):
36
- """
37
- Initializes the keypoint detection model.
33
+ def __init__ (self , inference_adapter , configuration : dict = {}, preload = False ):
34
+ """Initializes the keypoint detection model.
38
35
39
36
Args:
40
37
inference_adapter (InferenceAdapter): inference adapter containing the underlying model.
41
38
configuration (dict, optional): configuration overrides the model parameters (see parameters() method).
42
- Defaults to dict() .
39
+ Defaults to {} .
43
40
preload (bool, optional): forces inference adapter to load the model. Defaults to False.
44
41
"""
45
42
super ().__init__ (inference_adapter , configuration , preload )
46
43
self ._check_io_number (1 , 2 )
44
+ self .apply_softmax : bool
47
45
48
46
def postprocess (
49
- self , outputs : dict [str , np .ndarray ], meta : dict [str , Any ]
47
+ self ,
48
+ outputs : dict [str , np .ndarray ],
49
+ meta : dict [str , Any ],
50
50
) -> DetectedKeypoints :
51
- """
52
- Applies SCC decoded to the model outputs.
51
+ """Applies SCC decoded to the model outputs.
53
52
54
53
Args:
55
54
outputs (dict[str, np.ndarray]): raw outputs of the model
@@ -60,12 +59,26 @@ def postprocess(
60
59
"""
61
60
encoded_kps = list (outputs .values ())
62
61
batch_keypoints , batch_scores = _decode_simcc (
63
- * encoded_kps , apply_softmax = self .apply_softmax
62
+ encoded_kps [0 ],
63
+ encoded_kps [1 ],
64
+ apply_softmax = self .apply_softmax ,
64
65
)
65
66
orig_h , orig_w = meta ["original_shape" ][:2 ]
66
67
kp_scale_h = orig_h / self .h
67
68
kp_scale_w = orig_w / self .w
68
- batch_keypoints = batch_keypoints .squeeze () * np .array ([kp_scale_w , kp_scale_h ])
69
+
70
+ batch_keypoints = batch_keypoints .squeeze ()
71
+
72
+ if self .resize_type in ["fit_to_window" , "fit_to_window_letterbox" ]:
73
+ inverted_scale = max (kp_scale_h , kp_scale_w )
74
+ kp_scale_h = kp_scale_w = inverted_scale
75
+ if self .resize_type == "fit_to_window_letterbox" :
76
+ pad_left = (self .w - round (orig_w / inverted_scale )) // 2
77
+ pad_top = (self .h - round (orig_h / inverted_scale )) // 2
78
+ batch_keypoints -= np .array ([pad_left , pad_top ])
79
+
80
+ batch_keypoints *= np .array ([kp_scale_w , kp_scale_h ])
81
+
69
82
return DetectedKeypoints (batch_keypoints , batch_scores .squeeze ())
70
83
71
84
@classmethod
@@ -74,13 +87,15 @@ def parameters(cls) -> dict:
74
87
parameters .update (
75
88
{
76
89
"labels" : ListValue (
77
- description = "List of class labels" , value_type = str , default_value = []
90
+ description = "List of class labels" ,
91
+ value_type = str ,
92
+ default_value = [],
78
93
),
79
94
"apply_softmax" : BooleanValue (
80
95
default_value = True ,
81
96
description = "Whether to apply softmax on the heatmap." ,
82
97
),
83
- }
98
+ },
84
99
)
85
100
return parameters
86
101
@@ -137,23 +152,33 @@ def _decode_simcc(
137
152
simcc_y : np .ndarray ,
138
153
simcc_split_ratio : float = 2.0 ,
139
154
apply_softmax : bool = False ,
155
+ decode_beta : float = 150.0 ,
156
+ sigma : float | int = 6.0 ,
140
157
) -> tuple [np .ndarray , np .ndarray ]:
141
158
"""Decodes keypoint coordinates from SimCC representations. The decoded coordinates are in the input image space.
142
159
143
160
Args:
144
161
simcc_x (np.ndarray): SimCC label for x-axis
145
162
simcc_y (np.ndarray): SimCC label for y-axis
146
163
simcc_split_ratio (float): The ratio of the label size to the input size.
147
- apply_softmax (bool): whether to apply softmax on the heatmap .
164
+ apply_softmax (bool): whether to apply softmax during scores generation .
148
165
Defaults to False.
166
+ decode_beta (float): The beta value for decoding scores with softmax. Defaults
167
+ to 150.0.
168
+ sigma (float | int): The sigma value in the Gaussian SimCC
169
+ label. Defaults to 6.0
149
170
150
171
Returns:
151
172
tuple:
152
173
- keypoints (np.ndarray): Decoded coordinates in shape (N, K, D)
153
174
- scores (np.ndarray): The keypoint scores in shape (N, K).
154
175
It usually represents the confidence of the keypoint prediction
155
176
"""
156
- keypoints , scores = _get_simcc_maximum (simcc_x , simcc_y , apply_softmax )
177
+ keypoints , scores = _get_simcc_maximum (simcc_x , simcc_y )
178
+ if apply_softmax :
179
+ _ , scores = _get_simcc_maximum (
180
+ decode_beta * sigma * simcc_x , decode_beta * sigma * simcc_y , apply_softmax
181
+ )
157
182
158
183
# Unsqueeze the instance dimension for single-instance results
159
184
if keypoints .ndim == 2 :
@@ -169,6 +194,7 @@ def _get_simcc_maximum(
169
194
simcc_x : np .ndarray ,
170
195
simcc_y : np .ndarray ,
171
196
apply_softmax : bool = False ,
197
+ softmax_eps : float = 1e-06 ,
172
198
) -> tuple [np .ndarray , np .ndarray ]:
173
199
"""Get maximum response location and value from simcc representations.
174
200
@@ -183,6 +209,8 @@ def _get_simcc_maximum(
183
209
simcc_y (np.ndarray): y-axis SimCC in shape (K, Hy) or (N, K, Hy)
184
210
apply_softmax (bool): whether to apply softmax on the heatmap.
185
211
Defaults to False.
212
+ softmax_eps (flat): a constant to avoid division by zero in softmax.
213
+ Defaults to 1e-6.
186
214
187
215
Returns:
188
216
tuple:
@@ -212,17 +240,21 @@ def _get_simcc_maximum(
212
240
simcc_x = simcc_x - np .max (simcc_x , axis = 1 , keepdims = True )
213
241
simcc_y = simcc_y - np .max (simcc_y , axis = 1 , keepdims = True )
214
242
ex , ey = np .exp (simcc_x ), np .exp (simcc_y )
215
- simcc_x = ex / np .sum (ex , axis = 1 , keepdims = True )
216
- simcc_y = ey / np .sum (ey , axis = 1 , keepdims = True )
243
+ simcc_x = ex / ( np .sum (ex , axis = 1 , keepdims = True ) + softmax_eps )
244
+ simcc_y = ey / ( np .sum (ey , axis = 1 , keepdims = True ) + softmax_eps )
217
245
218
246
x_locs = np .argmax (simcc_x , axis = 1 )
219
247
y_locs = np .argmax (simcc_y , axis = 1 )
220
248
locs = np .stack ((x_locs , y_locs ), axis = - 1 ).astype (np .float32 )
221
249
max_val_x = np .take_along_axis (
222
- simcc_x , np .expand_dims (x_locs , axis = - 1 ), axis = - 1
250
+ simcc_x ,
251
+ np .expand_dims (x_locs , axis = - 1 ),
252
+ axis = - 1 ,
223
253
).squeeze (axis = - 1 )
224
254
max_val_y = np .take_along_axis (
225
- simcc_y , np .expand_dims (y_locs , axis = - 1 ), axis = - 1
255
+ simcc_y ,
256
+ np .expand_dims (y_locs , axis = - 1 ),
257
+ axis = - 1 ,
226
258
).squeeze (axis = - 1 )
227
259
228
260
mask = max_val_x > max_val_y
0 commit comments