Skip to content

Commit 67ffb32

Browse files
committed
Revert "Filter out remote model auto redeployment (opensearch-project#2976) and (opensearch-project#3104) and add necessary checks to start sync-up in auto_redeployer (opensearch-project#3378)
Signed-off-by: Bhavana Goud Ramaram <rbhavna@amazon.com>
1 parent 33b534a commit 67ffb32

File tree

4 files changed

+18
-73
lines changed

4 files changed

+18
-73
lines changed

plugin/src/main/java/org/opensearch/ml/autoredeploy/MLModelAutoReDeployer.java

+17-24
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
import java.util.ArrayList;
1313
import java.util.Arrays;
1414
import java.util.List;
15-
import java.util.Map;
1615
import java.util.Optional;
1716
import java.util.Queue;
1817
import java.util.concurrent.ConcurrentLinkedQueue;
@@ -31,7 +30,6 @@
3130
import org.opensearch.core.common.Strings;
3231
import org.opensearch.index.IndexNotFoundException;
3332
import org.opensearch.index.query.TermsQueryBuilder;
34-
import org.opensearch.ml.common.FunctionName;
3533
import org.opensearch.ml.common.MLModel;
3634
import org.opensearch.ml.common.model.MLModelState;
3735
import org.opensearch.ml.common.transport.deploy.MLDeployModelAction;
@@ -186,6 +184,9 @@ private void triggerAutoDeployModels(List<String> addedNodes) {
186184
modelAutoRedeployArrangements.add(modelAutoRedeployArrangement);
187185
});
188186
redeployAModel();
187+
} else {
188+
log.info("Could not find any models in the index, not performing auto reloading!");
189+
startCronjobAndClearListener();
189190
}
190191
}, e -> {
191192
if (e instanceof IndexNotFoundException) {
@@ -241,9 +242,7 @@ private void queryRunningModels(ActionListener<SearchResponse> listener) {
241242
String[] includes = new String[] {
242243
MLModel.AUTO_REDEPLOY_RETRY_TIMES_FIELD,
243244
MLModel.PLANNING_WORKER_NODES_FIELD,
244-
MLModel.DEPLOY_TO_ALL_NODES_FIELD,
245-
MLModel.FUNCTION_NAME_FIELD,
246-
MLModel.ALGORITHM_FIELD };
245+
MLModel.DEPLOY_TO_ALL_NODES_FIELD };
247246

248247
String[] excludes = new String[] { MLModel.MODEL_CONTENT_FIELD, MLModel.OLD_MODEL_CONTENT_FIELD };
249248
FetchSourceContext fetchContext = new FetchSourceContext(true, includes, excludes);
@@ -261,29 +260,22 @@ private void queryRunningModels(ActionListener<SearchResponse> listener) {
261260
private void triggerModelRedeploy(ModelAutoRedeployArrangement modelAutoRedeployArrangement) {
262261
if (modelAutoRedeployArrangement == null) {
263262
log.info("No more models in arrangement, skipping the redeployment");
263+
startCronjobAndClearListener();
264264
return;
265265
}
266266
String modelId = modelAutoRedeployArrangement.getSearchResponse().getId();
267267
List<String> addedNodes = modelAutoRedeployArrangement.getAddedNodes();
268-
Map<String, Object> sourceAsMap = modelAutoRedeployArrangement.getSearchResponse().getSourceAsMap();
269-
String functionName = (String) Optional
270-
.ofNullable(sourceAsMap.get(MLModel.FUNCTION_NAME_FIELD))
271-
.orElse(sourceAsMap.get(MLModel.ALGORITHM_FIELD));
272-
if (functionName == null) {
273-
log
274-
.error(
275-
"Model function_name or algorithm is null, model is not in correct status, please check the model, model id is: {}",
276-
modelId
277-
);
278-
return;
279-
}
280-
if (FunctionName.REMOTE == FunctionName.from(functionName)) {
281-
log.info("Skipping redeploying remote model {} as remote model deployment can be done at prediction time.", modelId);
282-
return;
283-
}
284-
List<String> planningWorkerNodes = (List<String>) sourceAsMap.get(MLModel.PLANNING_WORKER_NODES_FIELD);
285-
Integer autoRedeployRetryTimes = (Integer) sourceAsMap.get(MLModel.AUTO_REDEPLOY_RETRY_TIMES_FIELD);
286-
Boolean deployToAllNodes = (Boolean) Optional.ofNullable(sourceAsMap.get(MLModel.DEPLOY_TO_ALL_NODES_FIELD)).orElse(false);
268+
List<String> planningWorkerNodes = (List<String>) modelAutoRedeployArrangement
269+
.getSearchResponse()
270+
.getSourceAsMap()
271+
.get(MLModel.PLANNING_WORKER_NODES_FIELD);
272+
Integer autoRedeployRetryTimes = (Integer) modelAutoRedeployArrangement
273+
.getSearchResponse()
274+
.getSourceAsMap()
275+
.get(MLModel.AUTO_REDEPLOY_RETRY_TIMES_FIELD);
276+
Boolean deployToAllNodes = (Boolean) Optional
277+
.ofNullable(modelAutoRedeployArrangement.getSearchResponse().getSourceAsMap().get(MLModel.DEPLOY_TO_ALL_NODES_FIELD))
278+
.orElse(false);
287279
// calculate node ids.
288280
String[] nodeIds = null;
289281
if (deployToAllNodes || !allowCustomDeploymentPlan) {
@@ -302,6 +294,7 @@ private void triggerModelRedeploy(ModelAutoRedeployArrangement modelAutoRedeploy
302294
.info(
303295
"Allow custom deployment plan is true and deploy to all nodes is false and added nodes are not in planning worker nodes list, not to auto redeploy the model to the new nodes!"
304296
);
297+
redeployAModel();
305298
return;
306299
}
307300

plugin/src/test/java/org/opensearch/ml/autoredeploy/MLModelAutoReDeployerTests.java

-28
Original file line numberDiff line numberDiff line change
@@ -609,34 +609,6 @@ public void test_redeployAModel_with_needRedeployArray_isEmpty() {
609609
mlModelAutoReDeployer.redeployAModel();
610610
}
611611

612-
public void test_buildAutoReloadArrangement_skippingRemoteModel_success() throws Exception {
613-
Settings settings = Settings
614-
.builder()
615-
.put(ML_COMMONS_ONLY_RUN_ON_ML_NODE.getKey(), true)
616-
.put(ML_COMMONS_MODEL_AUTO_REDEPLOY_LIFETIME_RETRY_TIMES.getKey(), 3)
617-
.put(ML_COMMONS_MODEL_AUTO_REDEPLOY_ENABLE.getKey(), true)
618-
.put(ML_COMMONS_ALLOW_CUSTOM_DEPLOYMENT_PLAN.getKey(), false)
619-
.build();
620-
621-
ClusterService clusterService = mock(ClusterService.class);
622-
when(clusterService.localNode()).thenReturn(localNode);
623-
when(clusterService.getClusterSettings()).thenReturn(getClusterSettings(settings));
624-
mockClusterDataNodes(clusterService);
625-
626-
mlModelAutoReDeployer = spy(
627-
new MLModelAutoReDeployer(clusterService, client, settings, mlModelManager, searchRequestBuilderFactory)
628-
);
629-
630-
SearchResponse searchResponse = buildDeployToAllNodesTrueSearchResponse("RemoteModelResult.json");
631-
doAnswer(invocation -> {
632-
ActionListener<SearchResponse> listener = invocation.getArgument(0);
633-
listener.onResponse(searchResponse);
634-
return null;
635-
}).when(searchRequestBuilder).execute(isA(ActionListener.class));
636-
mlModelAutoReDeployer.buildAutoReloadArrangement(addedNodes, clusterManagerNodeId);
637-
verify(client, never()).execute(any(MLDeployModelAction.class), any(MLDeployModelRequest.class), any(ActionListener.class));
638-
}
639-
640612
private SearchResponse buildDeployToAllNodesTrueSearchResponse(String file) throws Exception {
641613
MLModel mlModel = buildModelWithJsonFile(file);
642614
return createResponseWithModel(mlModel);

plugin/src/test/resources/org/opensearch/ml/autoredeploy/RemoteModelResult.json

-20
This file was deleted.

plugin/src/test/resources/org/opensearch/ml/autoredeploy/TracedSmallModelRequest.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@
1111
"all_config": "{\"architectures\":[\"BertModel\"],\"max_position_embeddings\":512,\"model_type\":\"bert\",\"num_attention_heads\":12,\"num_hidden_layers\":6}"
1212
},
1313
"url": "https://github.com/opensearch-project/ml-commons/blob/2.x/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/text_embedding/traced_small_model.zip?raw=true"
14-
}
14+
}

0 commit comments

Comments
 (0)