diff --git a/Dockerfile b/Dockerfile index a164055..399bcbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,28 +1,41 @@ -# adapt cuda version based on your cuda-version, TERMINAL: `nvidia-smi` +# Use the official CUDA runtime base image, check it via nvidia-smi FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu20.04 -# Set up environment -RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl -RUN apt-get install -y git -RUN apt-get install unzip -RUN apt-get -y install python3 -RUN apt-get -y install python3-pip +# Set up environment and install necessary packages in one RUN command to reduce the number of layers +RUN apt-get update && \ + apt-get install --no-install-recommends --no-install-suggests -y \ + curl \ + git \ + unzip \ + python3 \ + python3-pip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* -# Copy our application code +# Set the working directory WORKDIR /restful-llama-3 + +# Copy requirements first to leverage Docker layer caching COPY ./requirements.txt /restful-llama-3 RUN pip3 install --no-cache-dir -r requirements.txt + +# Copy the rest of the application code COPY . . -# Create cache directory and set permissions +# Create model cache directory and set permissions RUN mkdir -p /restful-llama-3/cache && chmod -R 777 /restful-llama-3/cache -# Make start_app.sh executable + +# Make the start script executable RUN chmod +x /restful-llama-3/start_app.sh +# Set environment variables ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 ENV HF_HOME=/restful-llama-3/cache ENV HF_DATASETS_CACHE=/restful-llama-3/cache +# Expose HF port EXPOSE 7860 -CMD ["./start_app.sh"] \ No newline at end of file + +# Specify the command to run the application +CMD ["./start_app.sh"] diff --git a/app.py b/app.py index 74ac40b..84e7658 100644 --- a/app.py +++ b/app.py @@ -135,7 +135,7 @@ def is_system_prompt(msg): @app.get("/") def root(): "Started endpoint message" - return {"message": "Up and running"} + return "

FastAPI Up

" @app.post("/chat") def chat(chat_request: ChatRequest): diff --git a/start_app.sh b/start_app.sh index a1a59a8..ba07499 100755 --- a/start_app.sh +++ b/start_app.sh @@ -1,3 +1,2 @@ -# Start the app # Each worker takes up roughly 20GB RAM to load the model gunicorn -b 0.0.0.0:7860 app:app --workers 1 -k uvicorn.workers.UvicornWorker --timeout 600 \ No newline at end of file