From f0186ccd5f46ecdfeea8f7fc6d44b7054e47e54b Mon Sep 17 00:00:00 2001 From: Vladisalv Sovrasov Date: Thu, 6 Mar 2025 22:52:21 +0900 Subject: [PATCH 1/3] Fix kp posptrocessing --- .../model_api/models/keypoint_detection.py | 72 +++++++++++++------ 1 file changed, 52 insertions(+), 20 deletions(-) diff --git a/model_api/python/model_api/models/keypoint_detection.py b/model_api/python/model_api/models/keypoint_detection.py index e3e8a86f..473e01a4 100644 --- a/model_api/python/model_api/models/keypoint_detection.py +++ b/model_api/python/model_api/models/keypoint_detection.py @@ -26,30 +26,29 @@ class KeypointDetectionModel(ImageModel): - """ - A wrapper that implements a basic keypoint regression model. - """ + """A wrapper that implements a basic keypoint regression model.""" __model__ = "keypoint_detection" - def __init__(self, inference_adapter, configuration=dict(), preload=False): - """ - Initializes the keypoint detection model. + def __init__(self, inference_adapter, configuration: dict = {}, preload=False): + """Initializes the keypoint detection model. Args: inference_adapter (InferenceAdapter): inference adapter containing the underlying model. configuration (dict, optional): configuration overrides the model parameters (see parameters() method). - Defaults to dict(). + Defaults to {}. preload (bool, optional): forces inference adapter to load the model. Defaults to False. """ super().__init__(inference_adapter, configuration, preload) self._check_io_number(1, 2) + self.apply_softmax: bool def postprocess( - self, outputs: dict[str, np.ndarray], meta: dict[str, Any] + self, + outputs: dict[str, np.ndarray], + meta: dict[str, Any], ) -> DetectedKeypoints: - """ - Applies SCC decoded to the model outputs. + """Applies SCC decoded to the model outputs. Args: outputs (dict[str, np.ndarray]): raw outputs of the model @@ -60,12 +59,26 @@ def postprocess( """ encoded_kps = list(outputs.values()) batch_keypoints, batch_scores = _decode_simcc( - *encoded_kps, apply_softmax=self.apply_softmax + encoded_kps[0], + encoded_kps[1], + apply_softmax=self.apply_softmax, ) orig_h, orig_w = meta["original_shape"][:2] kp_scale_h = orig_h / self.h kp_scale_w = orig_w / self.w - batch_keypoints = batch_keypoints.squeeze() * np.array([kp_scale_w, kp_scale_h]) + + batch_keypoints = batch_keypoints.squeeze() + + if self.resize_type in ["fit_to_window", "fit_to_window_letterbox"]: + inverted_scale = max(kp_scale_h, kp_scale_w) + kp_scale_h = kp_scale_w = inverted_scale + if self.resize_type == "fit_to_window_letterbox": + pad_left = (self.w - round(orig_w / inverted_scale)) // 2 + pad_top = (self.h - round(orig_h / inverted_scale)) // 2 + batch_keypoints -= np.array([pad_left, pad_top]) + + batch_keypoints *= np.array([kp_scale_w, kp_scale_h]) + return DetectedKeypoints(batch_keypoints, batch_scores.squeeze()) @classmethod @@ -74,13 +87,15 @@ def parameters(cls) -> dict: parameters.update( { "labels": ListValue( - description="List of class labels", value_type=str, default_value=[] + description="List of class labels", + value_type=str, + default_value=[], ), "apply_softmax": BooleanValue( default_value=True, description="Whether to apply softmax on the heatmap.", ), - } + }, ) return parameters @@ -137,6 +152,8 @@ def _decode_simcc( simcc_y: np.ndarray, simcc_split_ratio: float = 2.0, apply_softmax: bool = False, + decode_beta: float = 150.0, + sigma: float | int = 6.0, ) -> tuple[np.ndarray, np.ndarray]: """Decodes keypoint coordinates from SimCC representations. The decoded coordinates are in the input image space. @@ -144,8 +161,12 @@ def _decode_simcc( simcc_x (np.ndarray): SimCC label for x-axis simcc_y (np.ndarray): SimCC label for y-axis simcc_split_ratio (float): The ratio of the label size to the input size. - apply_softmax (bool): whether to apply softmax on the heatmap. + apply_softmax (bool): whether to apply softmax during scores generation. Defaults to False. + decode_beta (float): The beta value for decoding scores with softmax. Defaults + to 150.0. + sigma (float | int): The sigma value in the Gaussian SimCC + label. Defaults to 6.0 Returns: tuple: @@ -153,7 +174,11 @@ def _decode_simcc( - scores (np.ndarray): The keypoint scores in shape (N, K). It usually represents the confidence of the keypoint prediction """ - keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y, apply_softmax) + keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y) + if apply_softmax: + _, scores = _get_simcc_maximum( + decode_beta * sigma * simcc_x, decode_beta * sigma * simcc_y, apply_softmax + ) # Unsqueeze the instance dimension for single-instance results if keypoints.ndim == 2: @@ -169,6 +194,7 @@ def _get_simcc_maximum( simcc_x: np.ndarray, simcc_y: np.ndarray, apply_softmax: bool = False, + softmax_eps: float = 1e-06, ) -> tuple[np.ndarray, np.ndarray]: """Get maximum response location and value from simcc representations. @@ -183,6 +209,8 @@ def _get_simcc_maximum( simcc_y (np.ndarray): y-axis SimCC in shape (K, Hy) or (N, K, Hy) apply_softmax (bool): whether to apply softmax on the heatmap. Defaults to False. + softmax_eps (flat): a constant to avoid division by zero in softmax. + Defaults to 1e-6. Returns: tuple: @@ -212,17 +240,21 @@ def _get_simcc_maximum( simcc_x = simcc_x - np.max(simcc_x, axis=1, keepdims=True) simcc_y = simcc_y - np.max(simcc_y, axis=1, keepdims=True) ex, ey = np.exp(simcc_x), np.exp(simcc_y) - simcc_x = ex / np.sum(ex, axis=1, keepdims=True) - simcc_y = ey / np.sum(ey, axis=1, keepdims=True) + simcc_x = ex / (np.sum(ex, axis=1, keepdims=True) + softmax_eps) + simcc_y = ey / (np.sum(ey, axis=1, keepdims=True) + softmax_eps) x_locs = np.argmax(simcc_x, axis=1) y_locs = np.argmax(simcc_y, axis=1) locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32) max_val_x = np.take_along_axis( - simcc_x, np.expand_dims(x_locs, axis=-1), axis=-1 + simcc_x, + np.expand_dims(x_locs, axis=-1), + axis=-1, ).squeeze(axis=-1) max_val_y = np.take_along_axis( - simcc_y, np.expand_dims(y_locs, axis=-1), axis=-1 + simcc_y, + np.expand_dims(y_locs, axis=-1), + axis=-1, ).squeeze(axis=-1) mask = max_val_x > max_val_y From a7a2409ca31f980b2b2f5b4d515e64814f47db78 Mon Sep 17 00:00:00 2001 From: Vladisalv Sovrasov Date: Thu, 6 Mar 2025 22:58:05 +0900 Subject: [PATCH 2/3] Bump version, fix ov deps --- model_api/python/pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/model_api/python/pyproject.toml b/model_api/python/pyproject.toml index 716053e4..96f436e4 100644 --- a/model_api/python/pyproject.toml +++ b/model_api/python/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta" [project] name = "openvino_model_api" -version = "0.2.5.1" +version = "0.2.5.2" requires-python = ">=3.9" authors = [ {name = "Intel(R) Corporation"}, @@ -26,8 +26,8 @@ dependencies = [ "numpy>=1.16.6", "opencv-python", "scipy>=1.5.4", - "openvino>=2024.0", - "openvino-dev>=2024.0", + "openvino==2024.*", + "openvino-dev==2024.*", "omz_tools @ git+https://github.com/openvinotoolkit/open_model_zoo.git@master#egg=omz_tools&subdirectory=tools/model_tools", ] From 372778bf41036594adadba21a4f816a7d75ead3b Mon Sep 17 00:00:00 2001 From: Vladisalv Sovrasov Date: Thu, 6 Mar 2025 23:07:52 +0900 Subject: [PATCH 3/3] Update kp refs --- tests/python/accuracy/public_scope.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/accuracy/public_scope.json b/tests/python/accuracy/public_scope.json index e9115dde..cac68e8e 100644 --- a/tests/python/accuracy/public_scope.json +++ b/tests/python/accuracy/public_scope.json @@ -429,7 +429,7 @@ "test_data": [ { "image": "coco128/images/train2017/000000000471.jpg", - "reference": ["keypoints: (17, 2), keypoints_x_sum: 5700.000, scores: (17,)"] + "reference": ["keypoints: (17, 2), keypoints_x_sum: 2930.000, scores: (17,)"] } ] },