Skip to content

Commit a9173a3

Browse files
authored
Fix extra error print in failed load (#503)
* Fix extra error print in failed load * Fix retries, printing, exception catching * Update pyproject.toml * Update pyproject.toml
1 parent d495531 commit a9173a3

File tree

4 files changed

+13
-5
lines changed

4 files changed

+13
-5
lines changed

pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "truss"
3-
version = "0.5.4"
3+
version = "0.5.5"
44
description = "A seamless bridge from model development to model delivery"
55
license = "MIT"
66
readme = "README.md"

truss/templates/control/control/endpoints.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from fastapi import APIRouter
66
from fastapi.responses import JSONResponse, StreamingResponse
77
from helpers.errors import ModelLoadFailed, ModelNotReady
8-
from httpx import URL, ConnectError
8+
from httpx import URL, ConnectError, RemoteProtocolError
99
from starlette.requests import Request
1010
from starlette.responses import Response
1111
from tenacity import Retrying, retry_if_exception_type, stop_after_attempt, wait_fixed
@@ -45,6 +45,7 @@ async def proxy(request: Request):
4545
retry=(
4646
retry_if_exception_type(ConnectError)
4747
| retry_if_exception_type(ModelNotReady)
48+
| retry_if_exception_type(RemoteProtocolError)
4849
),
4950
stop=stop_after_attempt(INFERENCE_SERVER_START_WAIT_SECS),
5051
wait=wait_fixed(1),
@@ -59,7 +60,7 @@ async def proxy(request: Request):
5960

6061
if await _is_model_not_ready(resp):
6162
raise ModelNotReady("Model has started running, but not ready yet.")
62-
except ConnectionError as exp:
63+
except (RemoteProtocolError, ConnectError) as exp:
6364
# This check is a bit expensive so we don't do it before every request, we
6465
# do it only if request fails with connection error. If the inference server
6566
# process is running then we continue waiting for it to start (by retrying),

truss/templates/control/control/helpers/inference_server_process_controller.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ class InferenceServerProcessController:
1919
_inference_server_home: str
2020
_app_logger: logging.Logger
2121
_inference_server_process_args: List[str]
22+
_logged_unrecoverable_since_last_restart: bool
2223

2324
def __init__(
2425
self,
@@ -33,6 +34,7 @@ def __init__(
3334
self._inference_server_started = False
3435
self._inference_server_ever_started = False
3536
self._inference_server_terminated = False
37+
self._logged_unrecoverable_since_last_restart = False
3638
self._app_logger = app_logger
3739

3840
def start(self, inf_env: dict):
@@ -45,6 +47,7 @@ def start(self, inf_env: dict):
4547

4648
self._inference_server_started = True
4749
self._inference_server_ever_started = True
50+
self._logged_unrecoverable_since_last_restart = False
4851

4952
def stop(self):
5053
if self._inference_server_process is not None:
@@ -106,4 +109,8 @@ def check_and_recover_inference_server(self, inf_env: dict):
106109
)
107110
self.start(inf_env)
108111
else:
109-
self._app_logger.warning("Inference server unrecoverable. Try patching")
112+
if not self._logged_unrecoverable_since_last_restart:
113+
self._app_logger.warning(
114+
"Inference server unrecoverable. Try patching"
115+
)
116+
self._logged_unrecoverable_since_last_restart = True

truss/templates/server/model_wrapper.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
MODEL_BASENAME = "model"
2222

23-
NUM_LOAD_RETRIES = int(os.environ.get("NUM_LOAD_RETRIES_TRUSS", "3"))
23+
NUM_LOAD_RETRIES = int(os.environ.get("NUM_LOAD_RETRIES_TRUSS", "1"))
2424
STREAMING_RESPONSE_QUEUE_READ_TIMEOUT_SECS = 60
2525
DEFAULT_PREDICT_CONCURRENCY = 1
2626

0 commit comments

Comments
 (0)