Add code for initial steps - fetch Deployment models,

open-edge-platform · rajeshgangireddy · Jul 18, 2024 · Jul 22, 2024 · Jul 22, 2024 · Jul 22, 2024
commit d95c4d3db6f8e3a3f1ee423031e86791ce5c830a
diff --git a/geti_sdk/detect_ood/ood_model.py b/geti_sdk/detect_ood/ood_model.py
@@ -12,61 +12,128 @@
 # See the License for the specific language governing permissions
 # and limitations under the License.
 
-from typing import List
+import logging
+from typing import List, Union
 
 import numpy as np
 from sklearn.ensemble import RandomForestClassifier
 
-from geti_sdk.data_models import Prediction
+from geti_sdk import Geti
+from geti_sdk.data_models import Prediction, Project
+from geti_sdk.data_models.enums.task_type import TaskType
+from geti_sdk.deployment import Deployment
+from geti_sdk.rest_clients import ModelClient
 
-from .utils import fit_pca_model, fre_score
+from .utils import fit_pca_model, fre_score, perform_knn_indexing
 
 
-class OODModel:
+class COODModel:
     """
-    Out-of-distribution detection model.
-    Uses the Combined out-of-distribution (COOD) detection
+    Out-of-distribution detection model. Uses the Combined out-of-distribution (COOD) detection
     algorithm (see : https://arxiv.org/abs/2403.06874).
     """
 
-    def __init__(self):
+    def __init__(
+        self,
+        geti: Geti,
+        project: Union[str, Project],
+        deployment: Deployment = None,
+    ):
         """
-        Template
+        todo[ood] : fill the docstring properly
+        Combined Out-of-Distribution (COOD) detection model.
+        :param geti: Geti instance on which the project to use for  lives
+        :param project: Project or project name to use for the . The
+            project must exist on the specified Geti instance
+        :param deployment: Deployment to use for OOD dete. If not provided, the
         """
-        self.ood_classifier = None  # The COOD random forest classifier
-        self.sub_models = None  # A dict consisting submodels (FRE, EnWeDi, MSP, etc)
+        self.geti = geti
+        if isinstance(project, str):
+            project_name = project
+            self.project = geti.get_project(project_name=project_name)
+        else:
+            self.project = project
 
-        pass
+        logging.info(
+            f"Building Combined OOD detection model for Intel® Geti™ project `{self.project.name}`."
+        )
 
-    def __call__(self, prediction: Prediction) -> float:
-        """
-        Return the COOD Score based using feature vector and prediction probabilities in "prediction".
-        """
-        # feature_vector = prediction.feature_vector
-        # for annotation in prediction.annotations:
-        #     # Better way to get probability (or logits_)
-        #     prediction_probabilities = [
-        #         label.probability for label in annotation.labels
-        #     ]
-        #
+        tasks_in_project = self.project.get_trainable_tasks()
+        if len(tasks_in_project) != 1:
+            raise ValueError(
+                "Out-of-distribution detection models are only "
+                "supported for projects with a single task for now."
+            )
+        # get the task type and check if it is classification
+        task_type = tasks_in_project[0].task_type
+        if task_type != TaskType.CLASSIFICATION:
+            raise ValueError(
+                "Out-of-distribution detection models are only "
+                "supported for classification tasks for now."
+            )
 
-        cood_features = self.call_sub_models(prediction)
-        cood_score = self.ood_classifier.predict(cood_features)
-        return cood_score
+        self.model_client = ModelClient(
+            session=geti.session, workspace_id=geti.workspace_id, project=self.project
+        )
 
-    def call_sub_models(self, prediction: Prediction) -> np.ndarray:
-        """
-        Call the sub-models to get the OOD scores
-        """
-        # see paper at https://github.com/VitjanZ/DRAEM
-        # Call's all submodel objects. Gets back individual scores
-        pass
+        if deployment is None:
+            self.deployment = self._get_usable_deployment()
+        else:
+            if not deployment.models[0].has_xai_head:
+                raise ValueError(
+                    "The provided deployment does not have an model with an XAI head."
+                    "Please reconfigure the deployment to include a model with an XAI head "
+                    "(OptimizedModel.has_xai_head must be True)"
+                )
 
-    def initialise_sub_models(self):
+            self.deployment = deployment
+
+        if not self.deployment.are_models_loaded:
+            self.deployment.load_inference_models(device="CPU")
+
+        # datasets_in_project = self.project.datasets
+
+        # The COOD random forest classifier
+        self.ood_classifier = None
+
+        # A dict consisting smaller OOD models (FRE, EnWeDi, etc)
+        self.sub_models = {
+            "knn_based": KNNBasedOODModel(knn_k=10),
+            "class_fre": ClassFREModel(n_components=0.995),
+            "max_softmax_probability": MaxSoftmaxProbabilityModel(),
+        }
+
+    def _get_usable_deployment(self) -> Deployment:
         """
-        Initialise all the sub-models (FRE, EnWeDi, etc). This is done before training the COOD model.
+        Get a deployment that has an optimised model with an XAI head.
         """
-        pass
+        # Check if there's at least one trained model in the project
+        models = self.model_client.get_all_active_models()
+        if len(models) == 0:
+            raise ValueError(
+                "No trained models were found in the project, please either "
+                "train a model first or specify an algorithm to train."
+            )
+
+        # We need the model which has xai enabled - this allows us to get the feature vector from the model.
+        model_with_xai_head = None
+
+        for model in models:
+            for optimised_model in model.optimized_models:
+                if optimised_model.has_xai_head:
+                    model_with_xai_head = model
+                    break
+
+        if model_with_xai_head is None:
+            raise ValueError(
+                "No trained model with an XAI head was found in the project, "
+                "please train a model with an XAI head first."
+            )
+
+        deployment = self.geti.deploy_project(
+            project_name=self.project.name, models=[model_with_xai_head]
+        )
+        return deployment
 
     def train(self):
         """
@@ -79,17 +146,47 @@ def train(self):
         #       2b : Else, generate images by applying corruptions
         # Step 3 : Extract Features, and predictions
         #       3a : Find a xai model
-        # Step 4 : Initialise/Index/Train all the sub-models (FRE,EnWeDi, etc)
+        # Step 4 :Train all the sub-models (FRE,EnWeDi, etc)
         # Step 5 : Forward pass through sub-models to get ood scores for each image
         # Step 6 : Train the COOD Random Forest
         # Step 7 : Test COOD on test set (?)  Determine threshold (usually this is just 0.5)
+
+        # Step 4 : Train all the sub models
+
+        for sub_model in self.sub_models.values():
+            sub_model.train()
+
         ood_classifier = RandomForestClassifier()
         features = []  # Each element is an output (ood score) from the sub-models
         labels = []  # OOD = 1, ID = 0
         ood_classifier.fit(features, labels)
 
         self.ood_classifier = ood_classifier
 
+    def __call__(self, prediction: Prediction) -> float:
+        """
+        Return the COOD Score based using feature vector and prediction probabilities in "prediction".
+        """
+        # feature_vector = prediction.feature_vector
+        # for annotation in prediction.annotations:
+        #     # Find a Better way to get probability (or logits_)
+        #     prediction_probabilities = [
+        #         label.probability for label in annotation.labels
+        #     ]
+        #
+
+        cood_features = self.call_sub_models(prediction)
+        cood_score = self.ood_classifier.predict(cood_features)
+        return cood_score
+
+    def call_sub_models(self, prediction: Prediction) -> np.ndarray:
+        """
+        Call the sub-models to get the OOD scores
+        """
+        # see paper at https://github.com/VitjanZ/DRAEM
+        # Call's all submodel objects. Gets back individual scores
+        pass
+
     def _get_labeled_id_images_from_project(self):
         """
         Create a list of the images that will be ID
@@ -110,29 +207,37 @@ def _create_ood_images(self):
         pass
 
 
-class ClassFREModel:
+class KNNBasedOODModel:
     """
-    Yet to be finalised
+    k Nearest Neighbour based OOD detection model.
     """
 
-    def __init__(self):
-        self.class_fre_models = None
-        self.n_components = 0.995
+    def __init__(self, knn_k: int = 10):
+        self.knn_k = knn_k
+        self.knn_search_index = None
 
-    def __call__(self, features: np.ndarray, prediction: np.ndarray) -> float:
+    def train(self):
         """
-        Return the class fre score for the given feature vector.
+        Train the kNN model
         """
-        fre_scores_per_class = {}
-        # class_fre_models is a dict with label name and pca model.
-        for label, pca_model in self.class_fre_models.items():
-            fre_scores_per_class[label] = fre_score(
-                representation_vectors=features,
-                pca_model=pca_model,
-            )
+        feature_vectors = None
+        self.knn_search_index = perform_knn_indexing(feature_vectors)
 
-        # return maximum FRE
-        return max(fre_scores_per_class.values())
+    def __call__(self, prediction: Prediction) -> dict:
+        """
+        Return the kNN OOD score for the given feature vector.
+        """
+        pass
+
+
+class ClassFREModel:
+    """
+    Yet to be finalised
+    """
+
+    def __init__(self, n_components=0.995):
+        self.n_components = n_components
+        self.pca_models_per_class = {}
 
     def train(self):
         """
@@ -146,12 +251,27 @@ def train(self):
         for label in np.unique(labels):
             # labels are list of class names and not indices
             class_indices = [i for i, j in enumerate(labels) if j == label]
+            class_features = features[class_indices]
             pca_models[label] = fit_pca_model(
-                train_features=features[class_indices],
-                n_components=0.995,
+                feature_vectors=class_features, n_components=self.n_components
+            )
+
+        self.pca_models_per_class = pca_models
+
+    def __call__(self, prediction: Prediction) -> dict:
+        """
+        Return the class fre score for the given feature vector.
+        """
+        features = prediction.feature_vector
+        fre_scores_per_class = {}
+        # class_fre_models is a dict with label name and pca model.
+        for label, pca_model in self.class_fre_models.items():
+            fre_scores_per_class[label] = fre_score(
+                feature_vectors=features, pca_model=pca_model
             )
 
-        self.class_fre_models = pca_models
+        # return maximum FRE
+        return {"max_fre": max(fre_scores_per_class.values())}
 
 
 class MaxSoftmaxProbabilityModel:
@@ -165,10 +285,11 @@ def __init__(self):
         pass
 
 
-class EnWeDiModel:
+class DKNNModel:
     """
-    Entropy Weighted Nearest Neighbour Model. Copy description from ILRF class
+    todo[ood] : Docstring if this class is actually used
     """
 
-    def __init__(self):
-        pass
+    # This will be called by KNNBasedModel.
+    # KnnBasedModel would have prepared the index. COOD or OODSubModel would have prepared the feature vectors
+    pass