Skip to content

Commit

Permalink
optimize dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
nlueem committed Aug 3, 2024
1 parent 1617581 commit 4fbdb8d
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 13 deletions.
35 changes: 24 additions & 11 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,28 +1,41 @@
# adapt cuda version based on your cuda-version, TERMINAL: `nvidia-smi`
# Use the official CUDA runtime base image, check it via nvidia-smi
FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu20.04

# Set up environment
RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl
RUN apt-get install -y git
RUN apt-get install unzip
RUN apt-get -y install python3
RUN apt-get -y install python3-pip
# Set up environment and install necessary packages in one RUN command to reduce the number of layers
RUN apt-get update && \
apt-get install --no-install-recommends --no-install-suggests -y \
curl \
git \
unzip \
python3 \
python3-pip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Copy our application code
# Set the working directory
WORKDIR /restful-llama-3

# Copy requirements first to leverage Docker layer caching
COPY ./requirements.txt /restful-llama-3
RUN pip3 install --no-cache-dir -r requirements.txt

# Copy the rest of the application code
COPY . .

# Create cache directory and set permissions
# Create model cache directory and set permissions
RUN mkdir -p /restful-llama-3/cache && chmod -R 777 /restful-llama-3/cache
# Make start_app.sh executable

# Make the start script executable
RUN chmod +x /restful-llama-3/start_app.sh

# Set environment variables
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
ENV HF_HOME=/restful-llama-3/cache
ENV HF_DATASETS_CACHE=/restful-llama-3/cache

# Expose HF port
EXPOSE 7860
CMD ["./start_app.sh"]

# Specify the command to run the application
CMD ["./start_app.sh"]
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def is_system_prompt(msg):
@app.get("/")
def root():
"Started endpoint message"
return {"message": "Up and running"}
return "<h1>FastAPI Up</h1>"

@app.post("/chat")
def chat(chat_request: ChatRequest):
Expand Down
1 change: 0 additions & 1 deletion start_app.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
# Start the app
# Each worker takes up roughly 20GB RAM to load the model
gunicorn -b 0.0.0.0:7860 app:app --workers 1 -k uvicorn.workers.UvicornWorker --timeout 600

0 comments on commit 4fbdb8d

Please sign in to comment.