Merge pull request #164 from runpod-workers/up-0.7.3

update vllm
runpod-workers · Feb 24, 2025 · 389fad7 · 389fad7
2 parents 6dcf39e + 56dc4ad
commit 389fad7
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 2 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -12,7 +12,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
     python3 -m pip install --upgrade -r /requirements.txt
 
 # Install vLLM (switching back to pip installs since issues that required building fork are fixed and space optimization is not as important since caching) and FlashInfer 
-RUN python3 -m pip install vllm==0.7.2 && \
+RUN python3 -m pip install vllm==0.7.3 && \
     python3 -m pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3
 
 # Setup for Option 2: Building the Image with the Model included

diff --git a/src/utils.py b/src/utils.py
@@ -44,7 +44,7 @@ def __init__(self, job):
         self.max_batch_size = job.get("max_batch_size")
         self.apply_chat_template = job.get("apply_chat_template", False)
         self.use_openai_format = job.get("use_openai_format", False)
-        self.sampling_params = SamplingParams(**job.get("sampling_params", {}))
+        self.sampling_params = SamplingParams(max_tokens=100, **job.get("sampling_params", {}))
         self.request_id = random_uuid()
         batch_size_growth_factor = job.get("batch_size_growth_factor")
         self.batch_size_growth_factor = float(batch_size_growth_factor) if batch_size_growth_factor else None