diff --git a/src/engine.py b/src/engine.py index 0c20dc3..d170edf 100644 --- a/src/engine.py +++ b/src/engine.py @@ -176,7 +176,8 @@ async def _handle_chat_or_completion_request(self, openai_request: JobInput): yield create_error_response(str(e)).model_dump() return - response_generator = await generator_function(request, raw_request=None) + dummy_request = DummyRequest() + response_generator = await generator_function(request, raw_request=dummy_request) if not openai_request.openai_input.get("stream") or isinstance(response_generator, ErrorResponse): yield response_generator.model_dump() diff --git a/src/utils.py b/src/utils.py index efe7611..220be90 100644 --- a/src/utils.py +++ b/src/utils.py @@ -49,9 +49,11 @@ def __init__(self, job): self.openai_input = job.get("openai_input") class DummyRequest: + def __init__(self): + self.headers = {} async def is_disconnected(self): return False - + class BatchSize: def __init__(self, max_batch_size, min_batch_size, batch_size_growth_factor): self.max_batch_size = max_batch_size