Initial Commit

open-edge-platform · rajeshgangireddy · Jul 18, 2024 · Jul 22, 2024 · Jul 22, 2024 · Jul 22, 2024
commit d3f52d403ca757c1211e69a66ced7468ceb0d8fb
diff --git a/geti_sdk/detect_ood/__init__.py b/geti_sdk/detect_ood/__init__.py
@@ -0,0 +1,37 @@
+# Copyright (C) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+"""
+Introduction
+------------
+
+The "detect_ood" package contains the
+:py:class:`~geti_sdk.detect_ood.OODModel` class, which provides
+Out-of-distribution detection functions (training an OODModel as well as detecting OOD samples).
+
+Primarily, it is used by the OODTrigger class (~geti_sdk.post_inference_hooks.triggers.ood_trigger.OODTrigger)
+to detect out-of-distribution samples.
+
+Module contents
+---------------
+
+.. automodule:: geti_sdk.detect_ood.OODModel
+   :members:
+   :undoc-members:
+   :show-inheritance:
+"""
+
+from .ood_model import OODModel
+
+__all__ = ["OODModel"]
diff --git a/geti_sdk/detect_ood/ood_model.py b/geti_sdk/detect_ood/ood_model.py
@@ -0,0 +1,174 @@
+# Copyright (C) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+from typing import List
+
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+
+from geti_sdk.data_models import Prediction
+
+from .utils import fit_pca_model, fre_score
+
+
+class OODModel:
+    """
+    Out-of-distribution detection model.
+    Uses the Combined out-of-distribution (COOD) detection
+    algorithm (see : https://arxiv.org/abs/2403.06874).
+    """
+
+    def __init__(self):
+        """
+        Template
+        """
+        self.ood_classifier = None  # The COOD random forest classifier
+        self.sub_models = None  # A dict consisting submodels (FRE, EnWeDi, MSP, etc)
+
+        pass
+
+    def __call__(self, prediction: Prediction) -> float:
+        """
+        Return the COOD Score based using feature vector and prediction probabilities in "prediction".
+        """
+        # feature_vector = prediction.feature_vector
+        # for annotation in prediction.annotations:
+        #     # Better way to get probability (or logits_)
+        #     prediction_probabilities = [
+        #         label.probability for label in annotation.labels
+        #     ]
+        #
+
+        cood_features = self.call_sub_models(prediction)
+        cood_score = self.ood_classifier.predict(cood_features)
+        return cood_score
+
+    def call_sub_models(self, prediction: Prediction) -> np.ndarray:
+        """
+        Call the sub-models to get the OOD scores
+        """
+        # see paper at https://github.com/VitjanZ/DRAEM
+        # Call's all submodel objects. Gets back individual scores
+        pass
+
+    def initialise_sub_models(self):
+        """
+        Initialise all the sub-models (FRE, EnWeDi, etc). This is done before training the COOD model.
+        """
+        pass
+
+    def train(self):
+        """
+        Train the COOD model using the RandomForestClassifier
+        """
+        # Step 1 : ID Images
+        #       1a : Get labelled images from the project
+        # Step 2 : OOD Data
+        #       2a : Check if any dataset called as OOD images exist
+        #       2b : Else, generate images by applying corruptions
+        # Step 3 : Extract Features, and predictions
+        #       3a : Find a xai model
+        # Step 4 : Initialise/Index/Train all the sub-models (FRE,EnWeDi, etc)
+        # Step 5 : Forward pass through sub-models to get ood scores for each image
+        # Step 6 : Train the COOD Random Forest
+        # Step 7 : Test COOD on test set (?)  Determine threshold (usually this is just 0.5)
+        ood_classifier = RandomForestClassifier()
+        features = []  # Each element is an output (ood score) from the sub-models
+        labels = []  # OOD = 1, ID = 0
+        ood_classifier.fit(features, labels)
+
+        self.ood_classifier = ood_classifier
+
+    def _get_labeled_id_images_from_project(self):
+        """
+        Create a list of the images that will be ID
+        """
+        pass
+
+    def _get_ood_images_from_project(self):
+        """
+        Create a list of the images that will be OOD
+        """
+        pass
+
+    def _create_ood_images(self):
+        """
+        Create near-OOD images by applying strong corruptions to the in-distribution images
+        """
+        # Options  : Applying corruptions, generating Perlin Noise Images, Background extraction
+        pass
+
+
+class ClassFREModel:
+    """
+    Yet to be finalised
+    """
+
+    def __init__(self):
+        self.class_fre_models = None
+        self.n_components = 0.995
+
+    def __call__(self, features: np.ndarray, prediction: np.ndarray) -> float:
+        """
+        Return the class fre score for the given feature vector.
+        """
+        fre_scores_per_class = {}
+        # class_fre_models is a dict with label name and pca model.
+        for label, pca_model in self.class_fre_models.items():
+            fre_scores_per_class[label] = fre_score(
+                representation_vectors=features,
+                pca_model=pca_model,
+            )
+
+        # return maximum FRE
+        return max(fre_scores_per_class.values())
+
+    def train(self):
+        """
+        Fit PCA Models on the in-distribution data for each class.
+        """
+        # iterate through unique labels and fit pca model for each class
+        pca_models = {}
+        features: np.ndarray = None
+        labels: List[str] = None
+
+        for label in np.unique(labels):
+            # labels are list of class names and not indices
+            class_indices = [i for i, j in enumerate(labels) if j == label]
+            pca_models[label] = fit_pca_model(
+                train_features=features[class_indices],
+                n_components=0.995,
+            )
+
+        self.class_fre_models = pca_models
+
+
+class MaxSoftmaxProbabilityModel:
+    """
+    Maximum Softmax Probability Model - A baseline OOD detection model.
+    Use the concept that a lower maximum softmax probability indicates that the image could be OOD.
+    See
+    """
+
+    def __init__(self):
+        pass
+
+
+class EnWeDiModel:
+    """
+    Entropy Weighted Nearest Neighbour Model. Copy description from ILRF class
+    """
+
+    def __init__(self):
+        pass
diff --git a/geti_sdk/detect_ood/utils.py b/geti_sdk/detect_ood/utils.py
@@ -0,0 +1,73 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+
+import numpy as np
+from sklearn.decomposition import PCA
+
+from geti_sdk.deployment import Deployment
+from geti_sdk.http_session import GetiSession
+from geti_sdk.rest_clients import ImageClient
+
+
+def fit_pca_model(representation_vectors=np.ndarray, n_components: float = 0.995):
+    """
+    Fit a Principal component analysis (PCA) model to the features and returns the model
+    :param representation_vectors: Train set features to fit the PCA model
+    :param n_components: Number of components (fraction of variance) to keep
+    """
+    pca_model = PCA(n_components)
+    pca_model.fit(representation_vectors)
+    return pca_model
+
+
+def fre_score(representation_vectors: np.ndarray, pca_model: PCA) -> np.ndarray:
+    """
+    Return the feature reconstruction error (FRE) score for a given feature vector(s)
+    :param representation_vectors: feature vectors to compute the FRE score
+    :param pca_model: PCA model to use for computing the FRE score. PCA model must be fitted already
+    """
+    features_original = representation_vectors
+    features_transformed = pca_model.transform(representation_vectors)
+    features_reconstructed = pca_model.inverse_transform(features_transformed)
+    fre_scores = np.sum(np.square(features_original - features_reconstructed), axis=1)
+    return fre_scores
+
+
+def extract_features_from_imageclient(
+    deployment: Deployment,
+    image_client: ImageClient,
+    geti_session: GetiSession,
+    n_images: int = -1,
+    normalise_feats: bool = True,
+):
+    """
+    Extract
+    """
+    pass
+
+
+def generate_ood_dataset_by_corruption(
+    geti_deployment: Deployment,
+    source_path: str,
+    corruption_type: str,
+    dest_path: str = None,
+    desired_accuracy: float = 50,
+    desired_accuracy_tol=3.0,
+    show_progress: bool = True,
+) -> str:
+    """
+    Util
+    """
+    pass
diff --git a/geti_sdk/post_inference_hooks/.DS_Store b/geti_sdk/post_inference_hooks/.DS_Store
diff --git a/geti_sdk/post_inference_hooks/__init__.py b/geti_sdk/post_inference_hooks/__init__.py
@@ -23,6 +23,7 @@
     EmptyLabelTrigger,
     LabelTrigger,
     ObjectCountTrigger,
+    OODTrigger,
 )
 
 __all__ = [
@@ -32,6 +33,7 @@
     "EmptyLabelTrigger",
     "ObjectCountTrigger",
     "AlwaysTrigger",
+    "OODTrigger",
     "GetiDataCollection",
     "FileSystemDataCollection",
     "HttpRequestAction",

diff --git a/geti_sdk/post_inference_hooks/triggers/__init__.py b/geti_sdk/post_inference_hooks/triggers/__init__.py
@@ -17,11 +17,13 @@
 from .empty_label_trigger import EmptyLabelTrigger
 from .label_trigger import LabelTrigger
 from .object_count_trigger import ObjectCountTrigger
+from .ood_trigger import OODTrigger
 
 __all__ = [
     "AlwaysTrigger",
     "ConfidenceTrigger",
     "LabelTrigger",
     "EmptyLabelTrigger",
     "ObjectCountTrigger",
+    "OODTrigger",
 ]
diff --git a/geti_sdk/post_inference_hooks/triggers/ood_trigger.py b/geti_sdk/post_inference_hooks/triggers/ood_trigger.py
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+
+
+import numpy as np
+
+from geti_sdk.data_models import Prediction
+from geti_sdk.deployment.inference_hook_interfaces import PostInferenceTrigger
+from geti_sdk.detect_ood import OODModel
+
+
+class OODTrigger(PostInferenceTrigger):
+    """
+    Post inference trigger based on the out-of-distribution (OOD) detection score for an image.
+    A threshold is already provided by the OODModel already to determine if the image is OOD or not. If the OOD score
+    is **higher** than the defined `threshold`, the trigger is activated.
+
+    :param ood_model: OODModel object that calculates the OOD score for an image
+    """
+
+    def __init__(self, ood_model: OODModel, threshold: float = 0.5):
+        super().__init__(threshold=threshold)
+        self.ood_model = ood_model
+
+        self._repr_info_ += ""
+
+    def __call__(self, image: np.ndarray, prediction: Prediction) -> float:
+        """
+         Compute an OOD score for the 'image' using the corresponding information (feature_vector,prediction probabilities)
+          from "prediction"
+
+        :param image: Numpy array representing an image
+        :param prediction: Prediction object corresponding to the inference result for the image.
+        :return: Float representing the score for the input
+        """
+        cood_score = self.ood_model(prediction=prediction)
+        return cood_score
+
+    def get_decision(self, score: float) -> bool:
+        """
+        Make a decision to classify the sample into "in-distribution" or "out-of-distribution" based on
+        the OOD score and threshold set for trigger.
+
+        :param score: Float representing the OOD-score for an image.
+        :return: Boolean indicating whether the trigger conditions are met (True,
+            score is **higher** than the threshold), or not (False)
+        """
+        return score > self.threshold