Skip to content

Commit 0755e50

Browse files
authored
Fix for controller error stack trace and tokenbucket (opensearch-project#1978)
* Fix for controller error stack trace and tokenbucket Signed-off-by: Sicheng Song <sicheng.song@outlook.com> * Adjust logging level Signed-off-by: Sicheng Song <sicheng.song@outlook.com> * Fix concern Signed-off-by: Sicheng Song <sicheng.song@outlook.com> * Fix spotless Signed-off-by: Sicheng Song <sicheng.song@outlook.com> --------- Signed-off-by: Sicheng Song <sicheng.song@outlook.com>
1 parent f63c4df commit 0755e50

File tree

2 files changed

+54
-17
lines changed

2 files changed

+54
-17
lines changed

plugin/src/main/java/org/opensearch/ml/action/models/DeleteModelTransportAction.java

+9-3
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ public void onResponse(DeleteResponse deleteResponse) {
251251
@Override
252252
public void onFailure(Exception e) {
253253
if (e instanceof IndexNotFoundException) {
254-
log.info("Model controller not deleted due to no model controller was found for model: " + modelId);
254+
log.info("Model controller not deleted due to no model controller found for model: " + modelId);
255255
actionListener.onFailure(e);
256256
} else {
257257
log.error("Failed to delete model controller for model: " + modelId, e);
@@ -272,9 +272,15 @@ private void deleteController(String modelId) {
272272
if (deleteResponse.getResult() == DocWriteResponse.Result.DELETED) {
273273
log.info("Model controller for model {} successfully deleted from index, result: {}", modelId, deleteResponse.getResult());
274274
} else {
275-
log.warn("The deletion of model controller for model {} returned with result: {}", modelId, deleteResponse.getResult());
275+
log.info("The deletion of model controller for model {} returned with result: {}", modelId, deleteResponse.getResult());
276276
}
277-
}, e -> log.error("Failed to re-deploy the model controller for model: " + modelId, e)));
277+
}, e -> {
278+
if (e instanceof IndexNotFoundException) {
279+
log.debug("Model controller not deleted due to no model controller found for model: " + modelId);
280+
} else {
281+
log.error("Failed to delete model controller for model: " + modelId, e);
282+
}
283+
}));
278284
}
279285

280286
private Boolean isModelNotDeployed(MLModelState mlModelState) {

plugin/src/main/java/org/opensearch/ml/model/MLModelManager.java

+45-14
Original file line numberDiff line numberDiff line change
@@ -1233,14 +1233,14 @@ public synchronized void undeployController(String modelId, ActionListener<Strin
12331233
} else if (isModelRunningOnNode(modelId)) {
12341234
log
12351235
.error(
1236-
"Failed to undeploy model controller due to model is in ML cache but with a state other than deployed. Please check model: "
1236+
"Failed to undeploy model controller because model is in ML cache but with a state other than deployed. Please check model: "
12371237
+ modelId,
12381238
new RuntimeException()
12391239
);
12401240
listener
12411241
.onFailure(
12421242
new RuntimeException(
1243-
"Failed to undeploy model controller due to model is in ML cache but with a state other than deployed. Please check model: "
1243+
"Failed to undeploy model controller because model is in ML cache but with a state other than deployed. Please check model: "
12441244
+ modelId
12451245
)
12461246
);
@@ -1276,25 +1276,47 @@ private synchronized void deployControllerWithDeployingModel(
12761276
log.error("Failed to parse ml task" + r.getId(), e);
12771277
listener.onFailure(e);
12781278
}
1279-
} else if (mlModel.getIsControllerEnabled() == null || !mlModel.getIsControllerEnabled()) {
1279+
} else if (!BooleanUtils.isTrue(mlModel.getIsControllerEnabled())) {
12801280
// Not going to respond the failure here due to the model deploy can still work
12811281
// well
12821282
listener
12831283
.onResponse(
1284-
"The model "
1284+
"No controller is deployed because the model "
12851285
+ modelId
1286-
+ " is expected not having a model controller. Please use the create model controller api to create one if this is unexpected."
1286+
+ " is expected not having an enabled model controller. Please use the create controller api to create one if this is unexpected."
12871287
);
12881288
log
1289-
.warn(
1290-
"The model "
1291-
+ modelId
1292-
+ " is expected not having a model controller. Please use the create model controller api to create one if this is unexpected."
1289+
.debug(
1290+
"No controller is deployed because the model " + modelId + " is expected not having an enabled model controller."
12931291
);
12941292
} else {
12951293
listener.onFailure(new OpenSearchStatusException("Failed to find model controller", RestStatus.NOT_FOUND));
12961294
}
1297-
}, listener::onFailure));
1295+
}, e -> {
1296+
if (e instanceof IndexNotFoundException) {
1297+
if (!BooleanUtils.isTrue(mlModel.getIsControllerEnabled())) {
1298+
// Not going to respond the failure here due to the model deploy can still work
1299+
// well
1300+
listener
1301+
.onResponse(
1302+
"No controller is deployed because the model "
1303+
+ modelId
1304+
+ " is expected not having an enabled model controller. Please use the create model controller api to create one if this is unexpected."
1305+
);
1306+
log
1307+
.debug(
1308+
"No controller is deployed because the model "
1309+
+ modelId
1310+
+ " is expected not having an enabled model controller."
1311+
);
1312+
} else {
1313+
listener.onFailure(new OpenSearchStatusException("Failed to find model controller", RestStatus.NOT_FOUND));
1314+
}
1315+
} else {
1316+
log.error("Failed to re-deploy the model controller for model: " + modelId, e);
1317+
listener.onFailure(e);
1318+
}
1319+
}));
12981320
}
12991321

13001322
/**
@@ -1315,10 +1337,13 @@ public void deployControllerWithDeployingModel(MLModel mlModel, Integer eligible
13151337
deployControllerWithDeployingModel(mlModel, eligibleNodeCount, ActionListener.wrap(response -> {
13161338
if (response.startsWith("Successfully")) {
13171339
log.debug(response, mlModel.getModelId());
1318-
} else if (response.startsWith("Failed")) {
1319-
log.error(response);
1340+
} else if (response
1341+
.endsWith(
1342+
"is expected not having a model controller. Please use the create model controller api to create one if this is unexpected."
1343+
)) {
1344+
log.warn(response);
13201345
} else {
1321-
log.info(response);
1346+
log.error(response);
13221347
}
13231348
}, e -> log.error("Failed to re-deploy the model controller for model: " + mlModel.getModelId(), e)));
13241349
}
@@ -1364,7 +1389,13 @@ private TokenBucket createTokenBucket(Integer eligibleNodeCount, MLRateLimiter r
13641389
limit / unit.toSeconds(1),
13651390
eligibleNodeCount
13661391
);
1367-
return new TokenBucket(System::nanoTime, limit / unit.toNanos(1) / eligibleNodeCount, limit, limit / eligibleNodeCount);
1392+
// Burst token must be greater than 1 to accept request
1393+
return new TokenBucket(
1394+
System::nanoTime,
1395+
limit / unit.toNanos(1) / eligibleNodeCount,
1396+
Math.max(limit / eligibleNodeCount, 1),
1397+
Math.max(limit / eligibleNodeCount, 1)
1398+
);
13681399
}
13691400
return null;
13701401
}

0 commit comments

Comments
 (0)