Skip to content

Commit c9da36b

Browse files
ylwu-amzndhrubo-os
authored andcommitted
* fixing metrics * addressing comments * addressing comments * updating test * added IllegalArgumentException in the if statement * addressing comments * fixing spotless --------- Signed-off-by: Dhrubo Saha <dhrubo@amazon.com> Co-authored-by: Dhrubo Saha <dhrubo@amazon.com>
1 parent 598ea84 commit c9da36b

File tree

3 files changed

+26
-10
lines changed

3 files changed

+26
-10
lines changed

plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java

+20-7
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
import org.opensearch.ml.common.MLTask;
9696
import org.opensearch.ml.common.connector.Connector;
9797
import org.opensearch.ml.common.exception.MLException;
98+
import org.opensearch.ml.common.exception.MLLimitExceededException;
9899
import org.opensearch.ml.common.exception.MLResourceNotFoundException;
99100
import org.opensearch.ml.common.exception.MLValidationException;
100101
import org.opensearch.ml.common.model.MLModelState;
@@ -318,9 +319,10 @@ private void uploadMLModelMeta(MLRegisterModelMetaInput mlRegisterModelMetaInput
318319
* @param mlTask ML task
319320
*/
320321
public void registerMLModel(MLRegisterModelInput registerModelInput, MLTask mlTask) {
321-
mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment();
322+
322323
checkAndAddRunningTask(mlTask, maxRegisterTasksPerNode);
323324
try {
325+
mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment();
324326
mlStats.getStat(MLNodeLevelStat.ML_NODE_EXECUTING_TASK_COUNT).increment();
325327
mlStats.createCounterStatIfAbsent(mlTask.getFunctionName(), REGISTER, ML_ACTION_REQUEST_COUNT).increment();
326328

@@ -380,7 +382,6 @@ public void registerMLModel(MLRegisterModelInput registerModelInput, MLTask mlTa
380382
handleException(registerModelInput.getFunctionName(), mlTask.getTaskId(), e);
381383
}
382384
} catch (Exception e) {
383-
mlStats.createCounterStatIfAbsent(mlTask.getFunctionName(), REGISTER, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment();
384385
handleException(registerModelInput.getFunctionName(), mlTask.getTaskId(), e);
385386
} finally {
386387
mlStats.getStat(MLNodeLevelStat.ML_NODE_EXECUTING_TASK_COUNT).increment();
@@ -392,9 +393,9 @@ private void indexRemoteModel(MLRegisterModelInput registerModelInput, MLTask ml
392393
FunctionName functionName = mlTask.getFunctionName();
393394
try (ThreadContext.StoredContext context = client.threadPool().getThreadContext().stashContext()) {
394395
mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment();
395-
396396
mlStats.createCounterStatIfAbsent(functionName, REGISTER, ML_ACTION_REQUEST_COUNT).increment();
397397
mlStats.getStat(MLNodeLevelStat.ML_NODE_EXECUTING_TASK_COUNT).increment();
398+
398399
String modelName = registerModelInput.getModelName();
399400
String version = modelVersion == null ? registerModelInput.getVersion() : modelVersion;
400401
Instant now = Instant.now();
@@ -462,7 +463,6 @@ private void registerModelFromUrl(MLRegisterModelInput registerModelInput, MLTas
462463
FunctionName functionName = mlTask.getFunctionName();
463464
try (ThreadContext.StoredContext context = client.threadPool().getThreadContext().stashContext()) {
464465
mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment();
465-
466466
mlStats.createCounterStatIfAbsent(functionName, REGISTER, ML_ACTION_REQUEST_COUNT).increment();
467467
mlStats.getStat(MLNodeLevelStat.ML_NODE_EXECUTING_TASK_COUNT).increment();
468468
String modelName = registerModelInput.getModelName();
@@ -689,7 +689,12 @@ private void deleteModel(String modelId) {
689689
}
690690

691691
private void handleException(FunctionName functionName, String taskId, Exception e) {
692-
mlStats.createCounterStatIfAbsent(functionName, REGISTER, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment();
692+
if (!(e instanceof MLLimitExceededException)
693+
&& !(e instanceof MLResourceNotFoundException)
694+
&& !(e instanceof IllegalArgumentException)) {
695+
mlStats.createCounterStatIfAbsent(functionName, REGISTER, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment();
696+
mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment();
697+
}
693698
Map<String, Object> updated = ImmutableMap.of(ERROR_FIELD, MLExceptionUtils.getRootCauseMessage(e), STATE_FIELD, FAILED);
694699
mlTaskManager.updateMLTask(taskId, updated, TIMEOUT_IN_MILLIS, true);
695700
}
@@ -713,6 +718,7 @@ public void deployModel(
713718
ActionListener<String> listener
714719
) {
715720
mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, ML_ACTION_REQUEST_COUNT).increment();
721+
mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment();
716722
List<String> workerNodes = mlTask.getWorkerNodes();
717723
if (modelCacheHelper.isModelDeployed(modelId)) {
718724
if (workerNodes != null && workerNodes.size() > 0) {
@@ -835,7 +841,13 @@ public void deployModel(
835841
}
836842

837843
private void handleDeployModelException(String modelId, FunctionName functionName, ActionListener<String> listener, Exception e) {
838-
mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment();
844+
845+
if (!(e instanceof MLLimitExceededException)
846+
&& !(e instanceof MLResourceNotFoundException)
847+
&& !(e instanceof IllegalArgumentException)) {
848+
mlStats.createCounterStatIfAbsent(functionName, ActionName.DEPLOY, MLActionLevelStat.ML_ACTION_FAILURE_COUNT).increment();
849+
mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_FAILURE_COUNT).increment();
850+
}
839851
removeModel(modelId);
840852
listener.onFailure(e);
841853
}
@@ -858,7 +870,7 @@ public void getModel(String modelId, ActionListener<MLModel> listener) {
858870
}
859871

860872
/**
861-
* Get model from model index with includes/exludes filter.
873+
* Get model from model index with includes/excludes filter.
862874
*
863875
* @param modelId model id
864876
* @param includes fields included
@@ -1045,6 +1057,7 @@ public synchronized Map<String, String> undeployModel(String[] modelIds) {
10451057
if (modelCacheHelper.isModelDeployed(modelId)) {
10461058
modelUndeployStatus.put(modelId, UNDEPLOYED);
10471059
mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_MODEL_COUNT).decrement();
1060+
mlStats.getStat(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT).increment();
10481061
mlStats
10491062
.createCounterStatIfAbsent(getModelFunctionName(modelId), ActionName.UNDEPLOY, ML_ACTION_REQUEST_COUNT)
10501063
.increment();

plugin/src/main/java/org/opensearch/ml/utils/MLExceptionUtils.java

+4-2
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,11 @@ public static String toJsonString(Map<String, String> nodeErrors) throws IOExcep
4343

4444
public static void logException(String errorMessage, Exception e, Logger log) {
4545
Throwable rootCause = ExceptionUtils.getRootCause(e);
46-
if (e instanceof MLLimitExceededException || e instanceof MLResourceNotFoundException) {
46+
if (e instanceof MLLimitExceededException || e instanceof MLResourceNotFoundException || e instanceof IllegalArgumentException) {
4747
log.warn(e.getMessage());
48-
} else if (rootCause instanceof MLLimitExceededException || rootCause instanceof MLResourceNotFoundException) {
48+
} else if (rootCause instanceof MLLimitExceededException
49+
|| rootCause instanceof MLResourceNotFoundException
50+
|| rootCause instanceof IllegalArgumentException) {
4951
log.warn(rootCause.getMessage());
5052
} else {
5153
log.error(errorMessage, e);

plugin/src/test/java/org/opensearch/ml/model/MLModelManagerTests.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -733,7 +733,8 @@ private void testDeployModel_FailedToRetrieveModelChunks(boolean lastChunk) {
733733

734734
modelManager.deployModel(modelId, modelContentHashValue, functionName, true, mlTask, listener);
735735
verify(modelCacheHelper).removeModel(eq(modelId));
736-
verify(mlStats).createCounterStatIfAbsent(eq(functionName), eq(ActionName.DEPLOY), eq(MLActionLevelStat.ML_ACTION_FAILURE_COUNT));
736+
verify(mlStats).createCounterStatIfAbsent(eq(functionName), eq(ActionName.DEPLOY), eq(MLActionLevelStat.ML_ACTION_REQUEST_COUNT));
737+
verify(mlStats).getStat(eq(MLNodeLevelStat.ML_NODE_TOTAL_REQUEST_COUNT));
737738
}
738739

739740
private void mock_client_index_ModelChunkFailure(Client client, String modelId) {

0 commit comments

Comments
 (0)