diff --git a/model_api/cpp/models/include/models/keypoint_detection.h b/model_api/cpp/models/include/models/keypoint_detection.h index edc89de8..f4a31be7 100644 --- a/model_api/cpp/models/include/models/keypoint_detection.h +++ b/model_api/cpp/models/include/models/keypoint_detection.h @@ -45,6 +45,7 @@ class KeypointDetectionModel : public ImageModel { static std::string ModelType; protected: + bool apply_softmax = true; void prepareInputsOutputs(std::shared_ptr& model) override; void updateModelInfo() override; diff --git a/model_api/cpp/models/src/keypoint_detection.cpp b/model_api/cpp/models/src/keypoint_detection.cpp index 8247e2f7..55a77f75 100644 --- a/model_api/cpp/models/src/keypoint_detection.cpp +++ b/model_api/cpp/models/src/keypoint_detection.cpp @@ -34,7 +34,7 @@ void colArgMax(const cv::Mat& src, cv::Mat& dst_locs, cv::Mat& dst_values) { dst_locs = cv::Mat::zeros(src.rows, 1, CV_32S); dst_values = cv::Mat::zeros(src.rows, 1, CV_32F); - for (int row = 0; row < src.rows; row++) { + for (int row = 0; row < src.rows; ++row) { const float *ptr_row = src.ptr(row); int max_val_idx = 0; dst_values.at(row) = ptr_row[max_val_idx]; @@ -48,9 +48,44 @@ void colArgMax(const cv::Mat& src, cv::Mat& dst_locs, cv::Mat& dst_values) { } } -DetectedKeypoints decode_simcc(const cv::Mat& simcc_x, const cv::Mat& simcc_y, + +cv::Mat softmax_row(const cv::Mat& src) { + cv::Mat result = src.clone(); + + for (int row = 0; row < result.rows; ++row) { + float* ptr_row = result.ptr(row); + float max_val = ptr_row[0]; + for (int col = 1; col < result.cols; ++col) { + if (ptr_row[col] > max_val) { + max_val = ptr_row[col]; + } + } + float sum = 0.0f; + for (int col = 0; col < result.cols; col++) { + ptr_row[col] = exp(ptr_row[col] - max_val); + sum += ptr_row[col]; + } + for (int col = 0; col < result.cols; ++col) { + ptr_row[col] /= sum; + } + } + + return result; +} + + +DetectedKeypoints decode_simcc(const cv::Mat& simcc_x_input, const cv::Mat& simcc_y_input, const cv::Point2f& extra_scale = cv::Point2f(1.f, 1.f), - float simcc_split_ratio = 2.0f) { + float simcc_split_ratio = 2.0f, + bool apply_softmax=false) { + cv::Mat simcc_x = simcc_x_input; + cv::Mat simcc_y = simcc_y_input; + + if (apply_softmax) { + simcc_x = softmax_row(simcc_x); + simcc_x = softmax_row(simcc_y); + } + cv::Mat x_locs, max_val_x; colArgMax(simcc_x, x_locs, max_val_x); @@ -77,6 +112,7 @@ std::string KeypointDetectionModel::ModelType = "keypoint_detection"; void KeypointDetectionModel::init_from_config(const ov::AnyMap& top_priority, const ov::AnyMap& mid_priority) { labels = get_from_any_maps("labels", top_priority, mid_priority, labels); + apply_softmax = get_from_any_maps("apply_softmax", top_priority, mid_priority, apply_softmax); } KeypointDetectionModel::KeypointDetectionModel(std::shared_ptr& model, const ov::AnyMap& configuration) : ImageModel(model, configuration) { @@ -200,7 +236,7 @@ std::unique_ptr KeypointDetectionModel::postprocess(InferenceResult& float inverted_scale_x = static_cast(image_data.inputImgWidth) / netInputWidth, inverted_scale_y = static_cast(image_data.inputImgHeight) / netInputHeight; - result->poses.emplace_back(decode_simcc(pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y})); + result->poses.emplace_back(decode_simcc(pred_x_mat, pred_y_mat, {inverted_scale_x, inverted_scale_y}, apply_softmax)); return std::unique_ptr(result); } diff --git a/model_api/python/model_api/models/keypoint_detection.py b/model_api/python/model_api/models/keypoint_detection.py index ebe70753..e3e8a86f 100644 --- a/model_api/python/model_api/models/keypoint_detection.py +++ b/model_api/python/model_api/models/keypoint_detection.py @@ -21,7 +21,7 @@ import numpy as np from .image_model import ImageModel -from .types import ListValue +from .types import BooleanValue, ListValue from .utils import DetectedKeypoints, Detection @@ -59,7 +59,9 @@ def postprocess( DetectedKeypoints: detected keypoints """ encoded_kps = list(outputs.values()) - batch_keypoints, batch_scores = _decode_simcc(*encoded_kps) + batch_keypoints, batch_scores = _decode_simcc( + *encoded_kps, apply_softmax=self.apply_softmax + ) orig_h, orig_w = meta["original_shape"][:2] kp_scale_h = orig_h / self.h kp_scale_w = orig_w / self.w @@ -74,6 +76,10 @@ def parameters(cls) -> dict: "labels": ListValue( description="List of class labels", value_type=str, default_value=[] ), + "apply_softmax": BooleanValue( + default_value=True, + description="Whether to apply softmax on the heatmap.", + ), } ) return parameters @@ -127,7 +133,10 @@ def predict_crops(self, crops: list[np.ndarray]) -> list[DetectedKeypoints]: def _decode_simcc( - simcc_x: np.ndarray, simcc_y: np.ndarray, simcc_split_ratio: float = 2.0 + simcc_x: np.ndarray, + simcc_y: np.ndarray, + simcc_split_ratio: float = 2.0, + apply_softmax: bool = False, ) -> tuple[np.ndarray, np.ndarray]: """Decodes keypoint coordinates from SimCC representations. The decoded coordinates are in the input image space. @@ -135,6 +144,8 @@ def _decode_simcc( simcc_x (np.ndarray): SimCC label for x-axis simcc_y (np.ndarray): SimCC label for y-axis simcc_split_ratio (float): The ratio of the label size to the input size. + apply_softmax (bool): whether to apply softmax on the heatmap. + Defaults to False. Returns: tuple: @@ -142,7 +153,7 @@ def _decode_simcc( - scores (np.ndarray): The keypoint scores in shape (N, K). It usually represents the confidence of the keypoint prediction """ - keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y) + keypoints, scores = _get_simcc_maximum(simcc_x, simcc_y, apply_softmax) # Unsqueeze the instance dimension for single-instance results if keypoints.ndim == 2: @@ -157,6 +168,7 @@ def _decode_simcc( def _get_simcc_maximum( simcc_x: np.ndarray, simcc_y: np.ndarray, + apply_softmax: bool = False, ) -> tuple[np.ndarray, np.ndarray]: """Get maximum response location and value from simcc representations. @@ -169,6 +181,8 @@ def _get_simcc_maximum( Args: simcc_x (np.ndarray): x-axis SimCC in shape (K, Wx) or (N, K, Wx) simcc_y (np.ndarray): y-axis SimCC in shape (K, Hy) or (N, K, Hy) + apply_softmax (bool): whether to apply softmax on the heatmap. + Defaults to False. Returns: tuple: @@ -194,6 +208,13 @@ def _get_simcc_maximum( else: batch_size = None + if apply_softmax: + simcc_x = simcc_x - np.max(simcc_x, axis=1, keepdims=True) + simcc_y = simcc_y - np.max(simcc_y, axis=1, keepdims=True) + ex, ey = np.exp(simcc_x), np.exp(simcc_y) + simcc_x = ex / np.sum(ex, axis=1, keepdims=True) + simcc_y = ey / np.sum(ey, axis=1, keepdims=True) + x_locs = np.argmax(simcc_x, axis=1) y_locs = np.argmax(simcc_y, axis=1) locs = np.stack((x_locs, y_locs), axis=-1).astype(np.float32)