diff --git a/Dockerfile b/Dockerfile
index a164055..399bcbd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,28 +1,41 @@
-# adapt cuda version based on your cuda-version, TERMINAL: `nvidia-smi`
+# Use the official CUDA runtime base image, check it via nvidia-smi
 FROM nvidia/cuda:11.4.3-cudnn8-runtime-ubuntu20.04
 
-# Set up environment
-RUN apt-get update && apt-get install --no-install-recommends --no-install-suggests -y curl
-RUN apt-get install -y git
-RUN apt-get install unzip
-RUN apt-get -y install python3
-RUN apt-get -y install python3-pip
+# Set up environment and install necessary packages in one RUN command to reduce the number of layers
+RUN apt-get update && \
+    apt-get install --no-install-recommends --no-install-suggests -y \
+    curl \
+    git \
+    unzip \
+    python3 \
+    python3-pip && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
 
-# Copy our application code
+# Set the working directory
 WORKDIR /restful-llama-3
+
+# Copy requirements first to leverage Docker layer caching
 COPY ./requirements.txt /restful-llama-3
 RUN pip3 install --no-cache-dir -r requirements.txt
+
+# Copy the rest of the application code
 COPY . .
 
-# Create cache directory and set permissions
+# Create model cache directory and set permissions
 RUN mkdir -p /restful-llama-3/cache && chmod -R 777 /restful-llama-3/cache
-# Make start_app.sh executable
+
+# Make the start script executable
 RUN chmod +x /restful-llama-3/start_app.sh
 
+# Set environment variables
 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8
 ENV HF_HOME=/restful-llama-3/cache
 ENV HF_DATASETS_CACHE=/restful-llama-3/cache
 
+# Expose HF port
 EXPOSE 7860
-CMD ["./start_app.sh"]
\ No newline at end of file
+
+# Specify the command to run the application
+CMD ["./start_app.sh"]
diff --git a/app.py b/app.py
index 74ac40b..84e7658 100644
--- a/app.py
+++ b/app.py
@@ -135,7 +135,7 @@ def is_system_prompt(msg):
 @app.get("/")
 def root():
     "Started endpoint message"
-    return {"message": "Up and running"}
+    return "<h1>FastAPI Up</h1>"
 
 @app.post("/chat")
 def chat(chat_request: ChatRequest):
diff --git a/start_app.sh b/start_app.sh
index a1a59a8..ba07499 100755
--- a/start_app.sh
+++ b/start_app.sh
@@ -1,3 +1,2 @@
-# Start the app
 # Each worker takes up roughly 20GB RAM to load the model
 gunicorn -b 0.0.0.0:7860 app:app --workers 1 -k uvicorn.workers.UvicornWorker --timeout 600
\ No newline at end of file