Skip to content

Commit

Permalink
dynamic lora loading
Browse files Browse the repository at this point in the history
Signed-off-by: pandyamarut <pandyamarut@gmail.com>
  • Loading branch information
pandyamarut committed Jan 29, 2025
1 parent 610429d commit c703254
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 10 deletions.
20 changes: 11 additions & 9 deletions src/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, CompletionRequest, ErrorResponse
from vllm.entrypoints.openai.serving_engine import BaseModelPath, LoRAModulePath
from vllm.entrypoints.openai.serving_models import BaseModelPath, LoRAModulePath, OpenAIServingModels


from utils import DummyRequest, JobInput, BatchSize, create_error_response
Expand Down Expand Up @@ -138,28 +138,30 @@ async def _initialize_engines(self):
except:
lora_modules = None


self.serving_models = OpenAIServingModels(
engine_client=self.llm,
model_config=self.model_config,
base_model_paths=self.base_model_paths,
lora_modules=None,
prompt_adapters=None,
)

self.chat_engine = OpenAIServingChat(
engine_client=self.llm,
model_config=self.model_config,
base_model_paths=self.base_model_paths,
models=self.serving_models,
response_role=self.response_role,
chat_template=self.tokenizer.tokenizer.chat_template,
enable_auto_tools=os.getenv('ENABLE_AUTO_TOOL_CHOICE', 'false').lower() == 'true',
tool_parser=os.getenv('TOOL_CALL_PARSER', "") or None,
lora_modules=lora_modules,
prompt_adapters=None,
chat_template_content_format="auto",
request_logger=None
)
self.completion_engine = OpenAIServingCompletion(
engine_client=self.llm,
model_config=self.model_config,
base_model_paths=self.base_model_paths,
models=self.serving_models,
lora_modules=lora_modules,
prompt_adapters=None,
request_logger=None
)

async def generate(self, openai_request: JobInput):
Expand All @@ -172,7 +174,7 @@ async def generate(self, openai_request: JobInput):
yield create_error_response("Invalid route").model_dump()

async def _handle_model_request(self):
models = await self.chat_engine.show_available_models()
models = await self.serving_models.show_available_models()
return models.model_dump()

async def _handle_chat_or_completion_request(self, openai_request: JobInput):
Expand Down
1 change: 0 additions & 1 deletion src/engine_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@
"disable_logprobs_during_spec_decoding": os.getenv('DISABLE_LOGPROBS_DURING_SPEC_DECODING', None),
"otlp_traces_endpoint": os.getenv('OTLP_TRACES_ENDPOINT', None),
"use_v2_block_manager": os.getenv('USE_V2_BLOCK_MANAGER', 'true'),
"limit_mm_per_prompt": convert_limit_mm_per_prompt(os.getenv('LIMIT_MM_PER_PROMPT', "image=1"))
}

def match_vllm_args(args):
Expand Down

0 comments on commit c703254

Please sign in to comment.