-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathood_model.py
295 lines (244 loc) · 9.98 KB
/
ood_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# Copyright (C) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
import logging
from typing import List, Union
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from geti_sdk import Geti
from geti_sdk.data_models import Prediction, Project
from geti_sdk.data_models.enums.task_type import TaskType
from geti_sdk.deployment import Deployment
from geti_sdk.rest_clients import ModelClient
from .utils import fit_pca_model, fre_score, perform_knn_indexing
class COODModel:
"""
Out-of-distribution detection model. Uses the Combined out-of-distribution (COOD) detection
algorithm (see : https://arxiv.org/abs/2403.06874).
"""
def __init__(
self,
geti: Geti,
project: Union[str, Project],
deployment: Deployment = None,
):
"""
todo[ood] : fill the docstring properly
Combined Out-of-Distribution (COOD) detection model.
:param geti: Geti instance on which the project to use for lives
:param project: Project or project name to use for the . The
project must exist on the specified Geti instance
:param deployment: Deployment to use for OOD dete. If not provided, the
"""
self.geti = geti
if isinstance(project, str):
project_name = project
self.project = geti.get_project(project_name=project_name)
else:
self.project = project
logging.info(
f"Building Combined OOD detection model for Intel® Geti™ project `{self.project.name}`."
)
tasks_in_project = self.project.get_trainable_tasks()
if len(tasks_in_project) != 1:
raise ValueError(
"Out-of-distribution detection models are only "
"supported for projects with a single task for now."
)
# get the task type and check if it is classification
task_type = tasks_in_project[0].task_type
if task_type != TaskType.CLASSIFICATION:
raise ValueError(
"Out-of-distribution detection models are only "
"supported for classification tasks for now."
)
self.model_client = ModelClient(
session=geti.session, workspace_id=geti.workspace_id, project=self.project
)
if deployment is None:
self.deployment = self._get_usable_deployment()
else:
if not deployment.models[0].has_xai_head:
raise ValueError(
"The provided deployment does not have an model with an XAI head."
"Please reconfigure the deployment to include a model with an XAI head "
"(OptimizedModel.has_xai_head must be True)"
)
self.deployment = deployment
if not self.deployment.are_models_loaded:
self.deployment.load_inference_models(device="CPU")
# datasets_in_project = self.project.datasets
# The COOD random forest classifier
self.ood_classifier = None
# A dict consisting smaller OOD models (FRE, EnWeDi, etc)
self.sub_models = {
"knn_based": KNNBasedOODModel(knn_k=10),
"class_fre": ClassFREModel(n_components=0.995),
"max_softmax_probability": MaxSoftmaxProbabilityModel(),
}
def _get_usable_deployment(self) -> Deployment:
"""
Get a deployment that has an optimised model with an XAI head.
"""
# Check if there's at least one trained model in the project
models = self.model_client.get_all_active_models()
if len(models) == 0:
raise ValueError(
"No trained models were found in the project, please either "
"train a model first or specify an algorithm to train."
)
# We need the model which has xai enabled - this allows us to get the feature vector from the model.
model_with_xai_head = None
for model in models:
for optimised_model in model.optimized_models:
if optimised_model.has_xai_head:
model_with_xai_head = model
break
if model_with_xai_head is None:
raise ValueError(
"No trained model with an XAI head was found in the project, "
"please train a model with an XAI head first."
)
deployment = self.geti.deploy_project(
project_name=self.project.name, models=[model_with_xai_head]
)
return deployment
def train(self):
"""
Train the COOD model using the RandomForestClassifier
"""
# Step 1 : ID Images
# 1a : Get labelled images from the project
# Step 2 : OOD Data
# 2a : Check if any dataset called as OOD images exist
# 2b : Else, generate images by applying corruptions
# Step 3 : Extract Features, and predictions
# 3a : Find a xai model
# Step 4 :Train all the sub-models (FRE,EnWeDi, etc)
# Step 5 : Forward pass through sub-models to get ood scores for each image
# Step 6 : Train the COOD Random Forest
# Step 7 : Test COOD on test set (?) Determine threshold (usually this is just 0.5)
# Step 4 : Train all the sub models
for sub_model in self.sub_models.values():
sub_model.train()
ood_classifier = RandomForestClassifier()
features = [] # Each element is an output (ood score) from the sub-models
labels = [] # OOD = 1, ID = 0
ood_classifier.fit(features, labels)
self.ood_classifier = ood_classifier
def __call__(self, prediction: Prediction) -> float:
"""
Return the COOD Score based using feature vector and prediction probabilities in "prediction".
"""
# feature_vector = prediction.feature_vector
# for annotation in prediction.annotations:
# # Find a Better way to get probability (or logits_)
# prediction_probabilities = [
# label.probability for label in annotation.labels
# ]
#
cood_features = self.call_sub_models(prediction)
cood_score = self.ood_classifier.predict(cood_features)
return cood_score
def call_sub_models(self, prediction: Prediction) -> np.ndarray:
"""
Call the sub-models to get the OOD scores
"""
# see paper at https://github.com/VitjanZ/DRAEM
# Call's all submodel objects. Gets back individual scores
pass
def _get_labeled_id_images_from_project(self):
"""
Create a list of the images that will be ID
"""
pass
def _get_ood_images_from_project(self):
"""
Create a list of the images that will be OOD
"""
pass
def _create_ood_images(self):
"""
Create near-OOD images by applying strong corruptions to the in-distribution images
"""
# Options : Applying corruptions, generating Perlin Noise Images, Background extraction
pass
class KNNBasedOODModel:
"""
k Nearest Neighbour based OOD detection model.
"""
def __init__(self, knn_k: int = 10):
self.knn_k = knn_k
self.knn_search_index = None
def train(self):
"""
Train the kNN model
"""
feature_vectors = None
self.knn_search_index = perform_knn_indexing(feature_vectors)
def __call__(self, prediction: Prediction) -> dict:
"""
Return the kNN OOD score for the given feature vector.
"""
pass
class ClassFREModel:
"""
Yet to be finalised
"""
def __init__(self, n_components=0.995):
self.n_components = n_components
self.pca_models_per_class = {}
def train(self):
"""
Fit PCA Models on the in-distribution data for each class.
"""
# iterate through unique labels and fit pca model for each class
pca_models = {}
features: np.ndarray = None
labels: List[str] = None
for label in np.unique(labels):
# labels are list of class names and not indices
class_indices = [i for i, j in enumerate(labels) if j == label]
class_features = features[class_indices]
pca_models[label] = fit_pca_model(
feature_vectors=class_features, n_components=self.n_components
)
self.pca_models_per_class = pca_models
def __call__(self, prediction: Prediction) -> dict:
"""
Return the class fre score for the given feature vector.
"""
features = prediction.feature_vector
fre_scores_per_class = {}
# class_fre_models is a dict with label name and pca model.
for label, pca_model in self.class_fre_models.items():
fre_scores_per_class[label] = fre_score(
feature_vectors=features, pca_model=pca_model
)
# return maximum FRE
return {"max_fre": max(fre_scores_per_class.values())}
class MaxSoftmaxProbabilityModel:
"""
Maximum Softmax Probability Model - A baseline OOD detection model.
Use the concept that a lower maximum softmax probability indicates that the image could be OOD.
See
"""
def __init__(self):
pass
class DKNNModel:
"""
todo[ood] : Docstring if this class is actually used
"""
# This will be called by KNNBasedModel.
# KnnBasedModel would have prepared the index. COOD or OODSubModel would have prepared the feature vectors
pass