forked from tolgaakar/restful-llama-3-8b
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
25 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,41 @@ | ||
# adapt cuda version based on your cuda-version, TERMINAL: `nvidia-smi` | ||
# Use the official CUDA runtime base image, check it via nvidia-smi | ||
FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu20.04 | ||
|
||
# Set up environment | ||
RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl | ||
RUN apt-get install -y git | ||
RUN apt-get install unzip | ||
RUN apt-get -y install python3 | ||
RUN apt-get -y install python3-pip | ||
# Set up environment and install necessary packages in one RUN command to reduce the number of layers | ||
RUN apt-get update && \ | ||
apt-get install --no-install-recommends --no-install-suggests -y \ | ||
curl \ | ||
git \ | ||
unzip \ | ||
python3 \ | ||
python3-pip && \ | ||
apt-get clean && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
# Copy our application code | ||
# Set the working directory | ||
WORKDIR /restful-llama-3 | ||
|
||
# Copy requirements first to leverage Docker layer caching | ||
COPY ./requirements.txt /restful-llama-3 | ||
RUN pip3 install --no-cache-dir -r requirements.txt | ||
|
||
# Copy the rest of the application code | ||
COPY . . | ||
|
||
# Create cache directory and set permissions | ||
# Create model cache directory and set permissions | ||
RUN mkdir -p /restful-llama-3/cache && chmod -R 777 /restful-llama-3/cache | ||
# Make start_app.sh executable | ||
|
||
# Make the start script executable | ||
RUN chmod +x /restful-llama-3/start_app.sh | ||
|
||
# Set environment variables | ||
ENV LC_ALL=C.UTF-8 | ||
ENV LANG=C.UTF-8 | ||
ENV HF_HOME=/restful-llama-3/cache | ||
ENV HF_DATASETS_CACHE=/restful-llama-3/cache | ||
|
||
# Expose HF port | ||
EXPOSE 7860 | ||
CMD ["./start_app.sh"] | ||
|
||
# Specify the command to run the application | ||
CMD ["./start_app.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,2 @@ | ||
# Start the app | ||
# Each worker takes up roughly 20GB RAM to load the model | ||
gunicorn -b 0.0.0.0:7860 app:app --workers 1 -k uvicorn.workers.UvicornWorker --timeout 600 |