Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OOD Detection using COOD #493

Draft
wants to merge 32 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
d3f52d4
Initial Commit
rajeshgangireddy Jul 18, 2024
632233e
Copy of notebook 103
rajeshgangireddy Jul 22, 2024
edbce1b
Rename OODModel to COODModel
rajeshgangireddy Jul 22, 2024
d95c4d3
Add code for initial steps - fetch Deployment models,
rajeshgangireddy Jul 22, 2024
013a6c4
Create ood_debug_delete_before_pr.py
rajeshgangireddy Jul 22, 2024
50b24bb
fix logic to get xai model
rajeshgangireddy Jul 23, 2024
99ab08e
add method to download, infer the id data
rajeshgangireddy Jul 31, 2024
ac76588
Update ood_model.py
rajeshgangireddy Jul 31, 2024
f718269
added code to prepare id and ood data - draft
rajeshgangireddy Aug 1, 2024
d99ca98
Update ood_model.py
rajeshgangireddy Aug 2, 2024
23213e8
Update ood_model.py
rajeshgangireddy Aug 5, 2024
e5b5991
Update ood_model.py
rajeshgangireddy Aug 6, 2024
e5eb494
training and inference works.
rajeshgangireddy Aug 12, 2024
c5984d0
move the deployment getting method to utils
rajeshgangireddy Aug 13, 2024
2821ea3
change number of cutouts to be 2
rajeshgangireddy Aug 14, 2024
9b1ef26
Refactored OOD Model
rajeshgangireddy Aug 14, 2024
9885fbd
Abstract class for sub models
rajeshgangireddy Aug 14, 2024
34ecff3
change corruption parameters again
rajeshgangireddy Aug 14, 2024
22e5d0b
Update ood_debug_delete_before_pr.py
rajeshgangireddy Aug 14, 2024
bdd3822
add more ood measures
rajeshgangireddy Aug 14, 2024
85cd5ed
Cleanup and added docstrings
rajeshgangireddy Aug 15, 2024
500b8da
Initial commit for notebook.
rajeshgangireddy Aug 21, 2024
8a0117c
Update 104_post_inference_hook_ood.ipynb
rajeshgangireddy Sep 4, 2024
5990dd1
Possibility to provide a image dir as reference for OOD data
rajeshgangireddy Sep 6, 2024
edd7392
Add train and test split functionality
rajeshgangireddy Sep 6, 2024
c36234e
better listing of images
rajeshgangireddy Sep 6, 2024
8b0c98e
Updated FRE calculations , added a new variant scores
rajeshgangireddy Sep 9, 2024
5fffa23
Determine best threshold for certain metrics.
rajeshgangireddy Sep 10, 2024
f97a151
WIP files for testing and notebook
rajeshgangireddy Sep 10, 2024
fe687c2
Refactor - Move functions into utils, move sub ood methods
rajeshgangireddy Sep 16, 2024
74e91cf
Demo Notebook
rajeshgangireddy Sep 20, 2024
8f2af0a
Delete ood_debug_delete_before_pr.py
rajeshgangireddy Sep 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add code for initial steps - fetch Deployment models,
  • Loading branch information
rajeshgangireddy committed Oct 24, 2024
commit d95c4d3db6f8e3a3f1ee423031e86791ce5c830a
239 changes: 180 additions & 59 deletions geti_sdk/detect_ood/ood_model.py
Original file line number Diff line number Diff line change
@@ -12,61 +12,128 @@
# See the License for the specific language governing permissions
# and limitations under the License.

from typing import List
import logging
from typing import List, Union

import numpy as np
from sklearn.ensemble import RandomForestClassifier

from geti_sdk.data_models import Prediction
from geti_sdk import Geti
from geti_sdk.data_models import Prediction, Project
from geti_sdk.data_models.enums.task_type import TaskType
from geti_sdk.deployment import Deployment
from geti_sdk.rest_clients import ModelClient

from .utils import fit_pca_model, fre_score
from .utils import fit_pca_model, fre_score, perform_knn_indexing


class OODModel:
class COODModel:
"""
Out-of-distribution detection model.
Uses the Combined out-of-distribution (COOD) detection
Out-of-distribution detection model. Uses the Combined out-of-distribution (COOD) detection
algorithm (see : https://arxiv.org/abs/2403.06874).
"""

def __init__(self):
def __init__(
self,
geti: Geti,
project: Union[str, Project],
deployment: Deployment = None,
):
"""
Template
todo[ood] : fill the docstring properly
Combined Out-of-Distribution (COOD) detection model.
:param geti: Geti instance on which the project to use for lives
:param project: Project or project name to use for the . The
project must exist on the specified Geti instance
:param deployment: Deployment to use for OOD dete. If not provided, the
"""
self.ood_classifier = None # The COOD random forest classifier
self.sub_models = None # A dict consisting submodels (FRE, EnWeDi, MSP, etc)
self.geti = geti
if isinstance(project, str):
project_name = project
self.project = geti.get_project(project_name=project_name)
else:
self.project = project

pass
logging.info(
f"Building Combined OOD detection model for Intel® Geti™ project `{self.project.name}`."
)

def __call__(self, prediction: Prediction) -> float:
"""
Return the COOD Score based using feature vector and prediction probabilities in "prediction".
"""
# feature_vector = prediction.feature_vector
# for annotation in prediction.annotations:
# # Better way to get probability (or logits_)
# prediction_probabilities = [
# label.probability for label in annotation.labels
# ]
#
tasks_in_project = self.project.get_trainable_tasks()
if len(tasks_in_project) != 1:
raise ValueError(
"Out-of-distribution detection models are only "
"supported for projects with a single task for now."
)
# get the task type and check if it is classification
task_type = tasks_in_project[0].task_type
if task_type != TaskType.CLASSIFICATION:
raise ValueError(
"Out-of-distribution detection models are only "
"supported for classification tasks for now."
)

cood_features = self.call_sub_models(prediction)
cood_score = self.ood_classifier.predict(cood_features)
return cood_score
self.model_client = ModelClient(
session=geti.session, workspace_id=geti.workspace_id, project=self.project
)

def call_sub_models(self, prediction: Prediction) -> np.ndarray:
"""
Call the sub-models to get the OOD scores
"""
# see paper at https://github.com/VitjanZ/DRAEM
# Call's all submodel objects. Gets back individual scores
pass
if deployment is None:
self.deployment = self._get_usable_deployment()
else:
if not deployment.models[0].has_xai_head:
raise ValueError(
"The provided deployment does not have an model with an XAI head."
"Please reconfigure the deployment to include a model with an XAI head "
"(OptimizedModel.has_xai_head must be True)"
)

def initialise_sub_models(self):
self.deployment = deployment

if not self.deployment.are_models_loaded:
self.deployment.load_inference_models(device="CPU")

# datasets_in_project = self.project.datasets

# The COOD random forest classifier
self.ood_classifier = None

# A dict consisting smaller OOD models (FRE, EnWeDi, etc)
self.sub_models = {
"knn_based": KNNBasedOODModel(knn_k=10),
"class_fre": ClassFREModel(n_components=0.995),
"max_softmax_probability": MaxSoftmaxProbabilityModel(),
}

def _get_usable_deployment(self) -> Deployment:
"""
Initialise all the sub-models (FRE, EnWeDi, etc). This is done before training the COOD model.
Get a deployment that has an optimised model with an XAI head.
"""
pass
# Check if there's at least one trained model in the project
models = self.model_client.get_all_active_models()
if len(models) == 0:
raise ValueError(
"No trained models were found in the project, please either "
"train a model first or specify an algorithm to train."
)

# We need the model which has xai enabled - this allows us to get the feature vector from the model.
model_with_xai_head = None

for model in models:
for optimised_model in model.optimized_models:
if optimised_model.has_xai_head:
model_with_xai_head = model
break

if model_with_xai_head is None:
raise ValueError(
"No trained model with an XAI head was found in the project, "
"please train a model with an XAI head first."
)

deployment = self.geti.deploy_project(
project_name=self.project.name, models=[model_with_xai_head]
)
return deployment

def train(self):
"""
@@ -79,17 +146,47 @@ def train(self):
# 2b : Else, generate images by applying corruptions
# Step 3 : Extract Features, and predictions
# 3a : Find a xai model
# Step 4 : Initialise/Index/Train all the sub-models (FRE,EnWeDi, etc)
# Step 4 :Train all the sub-models (FRE,EnWeDi, etc)
# Step 5 : Forward pass through sub-models to get ood scores for each image
# Step 6 : Train the COOD Random Forest
# Step 7 : Test COOD on test set (?) Determine threshold (usually this is just 0.5)

# Step 4 : Train all the sub models

for sub_model in self.sub_models.values():
sub_model.train()

ood_classifier = RandomForestClassifier()
features = [] # Each element is an output (ood score) from the sub-models
labels = [] # OOD = 1, ID = 0
ood_classifier.fit(features, labels)

self.ood_classifier = ood_classifier

def __call__(self, prediction: Prediction) -> float:
"""
Return the COOD Score based using feature vector and prediction probabilities in "prediction".
"""
# feature_vector = prediction.feature_vector
# for annotation in prediction.annotations:
# # Find a Better way to get probability (or logits_)
# prediction_probabilities = [
# label.probability for label in annotation.labels
# ]
#

cood_features = self.call_sub_models(prediction)
cood_score = self.ood_classifier.predict(cood_features)
return cood_score

def call_sub_models(self, prediction: Prediction) -> np.ndarray:
"""
Call the sub-models to get the OOD scores
"""
# see paper at https://github.com/VitjanZ/DRAEM
# Call's all submodel objects. Gets back individual scores
pass

def _get_labeled_id_images_from_project(self):
"""
Create a list of the images that will be ID
@@ -110,29 +207,37 @@ def _create_ood_images(self):
pass


class ClassFREModel:
class KNNBasedOODModel:
"""
Yet to be finalised
k Nearest Neighbour based OOD detection model.
"""

def __init__(self):
self.class_fre_models = None
self.n_components = 0.995
def __init__(self, knn_k: int = 10):
self.knn_k = knn_k
self.knn_search_index = None

def __call__(self, features: np.ndarray, prediction: np.ndarray) -> float:
def train(self):
"""
Return the class fre score for the given feature vector.
Train the kNN model
"""
fre_scores_per_class = {}
# class_fre_models is a dict with label name and pca model.
for label, pca_model in self.class_fre_models.items():
fre_scores_per_class[label] = fre_score(
representation_vectors=features,
pca_model=pca_model,
)
feature_vectors = None
self.knn_search_index = perform_knn_indexing(feature_vectors)

# return maximum FRE
return max(fre_scores_per_class.values())
def __call__(self, prediction: Prediction) -> dict:
"""
Return the kNN OOD score for the given feature vector.
"""
pass


class ClassFREModel:
"""
Yet to be finalised
"""

def __init__(self, n_components=0.995):
self.n_components = n_components
self.pca_models_per_class = {}

def train(self):
"""
@@ -146,12 +251,27 @@ def train(self):
for label in np.unique(labels):
# labels are list of class names and not indices
class_indices = [i for i, j in enumerate(labels) if j == label]
class_features = features[class_indices]
pca_models[label] = fit_pca_model(
train_features=features[class_indices],
n_components=0.995,
feature_vectors=class_features, n_components=self.n_components
)

self.pca_models_per_class = pca_models

def __call__(self, prediction: Prediction) -> dict:
"""
Return the class fre score for the given feature vector.
"""
features = prediction.feature_vector
fre_scores_per_class = {}
# class_fre_models is a dict with label name and pca model.
for label, pca_model in self.class_fre_models.items():
fre_scores_per_class[label] = fre_score(
feature_vectors=features, pca_model=pca_model
)

self.class_fre_models = pca_models
# return maximum FRE
return {"max_fre": max(fre_scores_per_class.values())}


class MaxSoftmaxProbabilityModel:
@@ -165,10 +285,11 @@ def __init__(self):
pass


class EnWeDiModel:
class DKNNModel:
"""
Entropy Weighted Nearest Neighbour Model. Copy description from ILRF class
todo[ood] : Docstring if this class is actually used
"""

def __init__(self):
pass
# This will be called by KNNBasedModel.
# KnnBasedModel would have prepared the index. COOD or OODSubModel would have prepared the feature vectors
pass