forked from tolgaakar/restful-llama-3-8b
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile
41 lines (32 loc) · 1.12 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# Use the official CUDA runtime base image, check it via nvidia-smi
FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu20.04
# Set up environment and install necessary packages in one RUN command to reduce the number of layers
RUN apt-get update && \
apt-get install --no-install-recommends --no-install-suggests -y \
curl \
git \
unzip \
python3 \
python3-pip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Set the working directory
WORKDIR /restful-llama-3
# Copy requirements first to leverage Docker layer caching
COPY ./requirements.txt /restful-llama-3
RUN pip3 install --no-cache-dir -r requirements.txt
# Copy the rest of the application code
COPY . .
# Create model cache directory and set permissions
RUN mkdir -p /restful-llama-3/cache && chmod -R 777 /restful-llama-3/cache
# Make the start script executable
RUN chmod +x /restful-llama-3/start_app.sh
# Set environment variables
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
ENV HF_HOME=/restful-llama-3/cache
ENV HF_DATASETS_CACHE=/restful-llama-3/cache
# Expose HF port
EXPOSE 7860
# Specify the command to run the application
CMD ["./start_app.sh"]