Skip to content

Commit 5b982c4

Browse files
authored
Fix PR opensearch-project#2976 bug due to missing adding function_name and algorithm in querying models (opensearch-project#3104)
* Fix PR opensearch-project#2976 bug due to missing adding function_name and algorithm in query models Signed-off-by: zane-neo <zaniu@amazon.com> * format code Signed-off-by: zane-neo <zaniu@amazon.com> * Move the setup mehtod body to test method body Signed-off-by: zane-neo <zaniu@amazon.com> --------- Signed-off-by: zane-neo <zaniu@amazon.com>
1 parent 0f7481e commit 5b982c4

File tree

3 files changed

+95
-2
lines changed

3 files changed

+95
-2
lines changed

plugin/src/main/java/org/opensearch/ml/autoredeploy/MLModelAutoReDeployer.java

+16-2
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ private void triggerUndeployModelsOnDataNodes(List<String> dataNodeIds) {
217217
client.execute(MLUndeployModelAction.INSTANCE, undeployModelNodesRequest, undeployModelListener);
218218
}
219219
}
220-
}, e -> { log.error("Failed to query need undeploy models, no action will be performed"); });
220+
}, e -> { log.error("Failed to query need undeploy models, no action will be performed", e); });
221221
queryRunningModels(listener);
222222
}
223223

@@ -241,7 +241,9 @@ private void queryRunningModels(ActionListener<SearchResponse> listener) {
241241
String[] includes = new String[] {
242242
MLModel.AUTO_REDEPLOY_RETRY_TIMES_FIELD,
243243
MLModel.PLANNING_WORKER_NODES_FIELD,
244-
MLModel.DEPLOY_TO_ALL_NODES_FIELD };
244+
MLModel.DEPLOY_TO_ALL_NODES_FIELD,
245+
MLModel.FUNCTION_NAME_FIELD,
246+
MLModel.ALGORITHM_FIELD };
245247

246248
String[] excludes = new String[] { MLModel.MODEL_CONTENT_FIELD, MLModel.OLD_MODEL_CONTENT_FIELD };
247249
FetchSourceContext fetchContext = new FetchSourceContext(true, includes, excludes);
@@ -257,12 +259,24 @@ private void queryRunningModels(ActionListener<SearchResponse> listener) {
257259

258260
@SuppressWarnings("unchecked")
259261
private void triggerModelRedeploy(ModelAutoRedeployArrangement modelAutoRedeployArrangement) {
262+
if (modelAutoRedeployArrangement == null) {
263+
log.info("No more models in arrangement, skipping the redeployment");
264+
return;
265+
}
260266
String modelId = modelAutoRedeployArrangement.getSearchResponse().getId();
261267
List<String> addedNodes = modelAutoRedeployArrangement.getAddedNodes();
262268
Map<String, Object> sourceAsMap = modelAutoRedeployArrangement.getSearchResponse().getSourceAsMap();
263269
String functionName = (String) Optional
264270
.ofNullable(sourceAsMap.get(MLModel.FUNCTION_NAME_FIELD))
265271
.orElse(sourceAsMap.get(MLModel.ALGORITHM_FIELD));
272+
if (functionName == null) {
273+
log
274+
.error(
275+
"Model function_name or algorithm is null, model is not in correct status, please check the model, model id is: {}",
276+
modelId
277+
);
278+
return;
279+
}
266280
if (FunctionName.REMOTE == FunctionName.from(functionName)) {
267281
log.info("Skipping redeploying remote model {} as remote model deployment can be done at prediction time.", modelId);
268282
return;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
*
3+
* * Copyright OpenSearch Contributors
4+
* * SPDX-License-Identifier: Apache-2.0
5+
*
6+
*/
7+
8+
package org.opensearch.ml.autoredeploy;
9+
10+
import static org.opensearch.ml.common.MLTask.MODEL_ID_FIELD;
11+
12+
import java.nio.file.Files;
13+
import java.nio.file.Path;
14+
15+
import org.opensearch.ml.common.MLTaskState;
16+
import org.opensearch.ml.rest.MLCommonsRestTestCase;
17+
18+
import lombok.SneakyThrows;
19+
20+
public class MLModelAutoReDeployerIT extends MLCommonsRestTestCase {
21+
22+
public void testModelAutoRedeploy() {
23+
prepareModel();
24+
}
25+
26+
@SneakyThrows
27+
private void prepareModel() {
28+
String requestBody = Files
29+
.readString(
30+
Path.of(this.getClass().getClassLoader().getResource("org/opensearch/ml/autoredeploy/TracedSmallModelRequest.json").toURI())
31+
);
32+
String registerFirstModelTaskId = registerModel(requestBody);
33+
String registerSecondModelTaskId = registerModel(requestBody);
34+
waitForTask(registerFirstModelTaskId, MLTaskState.COMPLETED);
35+
getTask(client(), registerFirstModelTaskId, response -> {
36+
String firstModelId = (String) response.get(MODEL_ID_FIELD);
37+
try {
38+
String deployFirstModelTaskId = deployModel(firstModelId);
39+
getTask(client(), registerSecondModelTaskId, response1 -> {
40+
String secondModelId = (String) response1.get(MODEL_ID_FIELD);
41+
try {
42+
/**
43+
* At this time point, the model auto redeployer should be querying the deploying/deploy failed/partially deployed models.
44+
* The original deploy model task should be able to complete successfully, if not it means the
45+
* org.opensearch.ml.action.forward.TransportForwardAction.triggerNextModelDeployAndCheckIfRestRetryTimes might throw exception
46+
* which cause by org.opensearch.ml.autoredeploy.MLModelAutoReDeployer#redeployAModel. The auto redeploy constructs an arrangement
47+
* with two models, the first model deploy done event will trigger the auto redeploy's next model deploy, and if during this
48+
* any error occurs, the first model deploy task status won't be updated to complete. So if this IT can pass, then it means the
49+
* next model auto redeploy trigger is correct.
50+
*/
51+
String deploySecondModelTaskId = deployModel(secondModelId);
52+
waitForTask(deploySecondModelTaskId, MLTaskState.COMPLETED);
53+
} catch (Exception e) {
54+
fail(e.getMessage());
55+
}
56+
});
57+
waitForTask(deployFirstModelTaskId, MLTaskState.COMPLETED);
58+
} catch (Exception e) {
59+
logger.error(e.getMessage(), e);
60+
fail(e.getMessage());
61+
}
62+
});
63+
}
64+
65+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"name": "traced_small_model",
3+
"version": "1.0.0",
4+
"model_format": "TORCH_SCRIPT",
5+
"model_task_type": "text_embedding",
6+
"model_content_hash_value": "e13b74006290a9d0f58c1376f9629d4ebc05a0f9385f40db837452b167ae9021",
7+
"model_config": {
8+
"model_type": "bert",
9+
"embedding_dimension": 768,
10+
"framework_type": "sentence_transformers",
11+
"all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}"
12+
},
13+
"url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true"
14+
}

0 commit comments

Comments
 (0)