re-work PR changes instead of #328

sramakintel · sramakintel · commit ed4f4e0efd0c · 2024-08-26T10:11:14.000-07:00
Signed-off-by: Srikanth Ramakrishna &lt;srikanth.ramakrishna@intel.com&gt;
diff --git a/pytorch/Dockerfile b/pytorch/Dockerfile
@@ -34,6 +34,7 @@ ARG BASE_IMAGE_TAG
 ARG PACKAGE_OPTION=pip
 ARG PYTHON_VERSION
 ARG PYTHON_BASE=${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER}-${BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${PACKAGE_OPTION}-py${PYTHON_VERSION}-base
+ARG TORCHSERVE_BASE=${PYTHON_BASE}
 FROM ${PYTHON_BASE} AS ipex-base-pip
 
 WORKDIR /
@@ -181,13 +182,17 @@ RUN apt-get update && \
     intel-oneapi-runtime-mkl=${MKL_VER} \
     intel-oneapi-runtime-ccl=${CCL_VER};
 
+RUN rm -rf /etc/apt/sources.list.d/intel-gpu-jammy.list /etc/apt/sources.list.d/oneAPI.list
+
+ENV LD_LIBRARY_PATH=/opt/intel/oneapi/redist/lib:$LD_LIBRARY_PATH
+
+FROM ipex-xpu-base AS ipex-xpu-base-wheels
+
 WORKDIR /
 COPY xpu-requirements.txt .
 
 RUN python -m pip install --no-cache-dir -r xpu-requirements.txt && \
-    rm -rf xpu-requirements.txt /etc/apt/sources.list.d/intel-gpu-jammy.list /etc/apt/sources.list.d/oneAPI.list
-
-ENV LD_LIBRARY_PATH=/opt/intel/oneapi/redist/lib:$LD_LIBRARY_PATH
+    rm -rf xpu-requirements.txt
 
 FROM ipex-xpu-base AS ipex-xpu-jupyter
 
@@ -205,7 +210,8 @@ EXPOSE 8888
 
 CMD ["bash", "-c", "source /etc/bash.bashrc && jupyter notebook --notebook-dir=/jupyter --port 8888 --ip 0.0.0.0 --no-browser --allow-root --ServerApp.token= --ServerApp.password= --ServerApp.allow_origin=* --ServerApp.base_url=$NB_PREFIX"]
 
-FROM ${PYTHON_BASE} as torchserve-base
+
+FROM ${TORCHSERVE_BASE} as torchserve-base
 
 ENV PYTHONUNBUFFERED=TRUE
 
@@ -221,8 +227,6 @@ RUN useradd -m -s /bin/bash model-server && \
     mkdir -p /home/model-server/model-store && \
     chown -R model-server /home/model-server/
 
-FROM torchserve-base AS compile
-
 RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
     g++ \
     git \
@@ -237,11 +241,6 @@ RUN python3 -m venv /home/venv
 ENV PATH="/home/venv/bin:$PATH"
 
 WORKDIR /home/model-server
-COPY torchserve-requirements.txt .
-COPY requirements.txt .
-
-RUN python -m pip install --no-cache-dir -r requirements.txt && \
-    python -m pip install --no-cache-dir -r torchserve-requirements.txt
 
 RUN echo -e "#!/bin/bash \n\
 set -e \n\
@@ -253,13 +252,29 @@ else \n\
 fi \n\
 tail -f /dev/null" >> /usr/local/bin/dockerd-entrypoint.sh
 
-FROM torchserve-base AS torchserve
+FROM torchserve-base AS compile-cpu
+
+COPY serving/torchserve-requirements.txt .
+COPY requirements.txt .
+
+RUN python -m pip install --no-cache-dir -r requirements.txt && \
+    python -m pip install --no-cache-dir -r torchserve-requirements.txt && \
+    rm -rf requirements.txt torchserve-requirements.txt
+
+FROM torchserve-base AS compile-xpu
+
+COPY serving/torchserve-xpu-requirements.txt .
+
+RUN python -m pip install --no-cache-dir -r torchserve-xpu-requirements.txt && \
+    rm -rf torchserve-xpu-requirements.txt
+
+FROM torchserve-base AS torchserve-cpu
 
 USER model-server
 WORKDIR /home/model-server
 
-COPY --chown=model-server --from=compile /home/venv /home/venv
-COPY --chown=model-server --chmod=755 --from=compile /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
+COPY --chown=model-server --from=compile-cpu /home/venv /home/venv
+COPY --chown=model-server --chmod=755 --from=compile-cpu /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
 COPY --chown=model-server serving/config.properties /home/model-server/config.properties
 
 ENV PATH="/home/venv/bin:$PATH"
@@ -270,3 +285,64 @@ EXPOSE 8080 8081 8082 7070 7071
 
 ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
 CMD ["serve"]
+
+FROM torchserve-base AS torchserve-xpu
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends --fix-missing \
+        gnupg2 \
+        gpg-agent \
+        rsync && \
+    apt-get clean && \
+    rm -rf  /var/lib/apt/lists/*
+
+RUN wget -qO - https://repositories.intel.com/gpu/intel-graphics.key | \
+    gpg --dearmor --yes --output /usr/share/keyrings/intel-graphics.gpg
+RUN echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy unified" | \
+    tee /etc/apt/sources.list.d/intel-gpu-jammy.list
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        jq \
+        curl \
+        libnl-genl-3-200 \
+        intel-gsc \
+        libdrm2 \
+        intel-metrics-discovery \
+        intel-metrics-library && \
+    apt-get autoremove -y && \
+    rm -rf /var/lib/apt/lists/*
+
+ARG XPU_SMI_VERSION
+
+ARG API_URL=https://api.github.com/repos/intel/xpumanager/releases/tags/V${XPU_SMI_VERSION}
+
+RUN wget -q --header="Accept: application/vnd.github.v3+json" --header="User-Agent: MyClient/1.0.0" -O - "$API_URL" | tee /tmp/asset_data.txt && \
+    wget -q --no-check-certificate "$(jq -r '.assets[] | select(.name | test("^xpu-smi.*u22\\.04_amd64\\.deb$")) | .browser_download_url' < /tmp/asset_data.txt)" && \
+    ldconfig && dpkg -i --force-all -- *.deb && \
+    rm -rf -- *.deb /etc/apt/sources.list.d/intel-gpu-jammy.list /etc/apt/sources.list.d/oneAPI.list /tmp/asset_data.txt
+
+ARG GID=109
+
+RUN groupadd -g ${GID} render &&\
+    usermod -aG video,render model-server
+
+USER model-server
+
+WORKDIR /home/model-server
+
+RUN wget --progress=dot:giga https://raw.githubusercontent.com/pytorch/serve/master/examples/intel_extension_for_pytorch/intel_gpu_metric_collector.py && \
+    wget --progress=dot:giga https://raw.githubusercontent.com/pytorch/serve/master/examples/intel_extension_for_pytorch/intel_gpu.py
+
+COPY --chown=model-server --from=compile-xpu /home/venv /home/venv
+COPY --chown=model-server --chmod=755 --from=compile-xpu /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
+COPY --chown=model-server serving/config-xpu.properties /home/model-server/config.properties
+
+ENV PATH="/home/venv/bin:$PATH"
+ENV TEMP=/home/model-server/tmp
+
+# 8080/8081/8082 REST and 7070/7071 gRPC
+EXPOSE 8080 8081 8082 7070 7071
+
+ENTRYPOINT ["/usr/local/bin/dockerd-entrypoint.sh"]
+CMD ["serve"]
diff --git a/pytorch/docker-compose.yaml b/pytorch/docker-compose.yaml
@@ -122,7 +122,7 @@ services:
         org.opencontainers.base.name: "intel/python:3.10-core"
         org.opencontainers.image.title: "Intel® Extension for PyTorch XPU Base Image"
         org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-xpu-${PACKAGE_OPTION:-pip}-base
-      target: ipex-xpu-base
+      target: ipex-xpu-base-wheels
     command: >
       python -c "import torch;print(torch.device('xpu'));import
       intel_extension_for_pytorch as
@@ -156,7 +156,7 @@ services:
     image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-${PACKAGE_OPTION:-pip}-py${PYTHON_VERSION:-3.10}-ipex-${IPEX_VERSION:-2.1.40xpu}-xpu-jupyter
     ports:
       - 8888:8888
-  torchserve:
+  torchserve-cpu:
     build:
       args:
         PACKAGE_OPTION: pip
@@ -165,22 +165,43 @@ services:
         dependency.apt.openjdk-17-jdk: true
         dependency.idp: false
         dependency.python.ipex: requirements.txt
-        dependency.python.pip: torchserve-requirements.txt
+        dependency.python.pip: serving/torchserve-requirements.txt
         docs: serving
         org.opencontainers.base.name: "intel/python:3.10-core"
         org.opencontainers.image.title: "Intel® Extension for PyTorch Serving Image"
         org.opencontainers.image.version: ${IPEX_VERSION:-2.4.0}-serving-cpu
-      target: torchserve
+      target: torchserve-cpu
     command: torchserve --version
     entrypoint: ""
     extends: ipex-base
-    image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve
+    image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-cpu
     ports:
       - 8080:8080
       - 8081:8081
       - 8082:8082
       - 7070:7070
       - 7071:7071
+  torchserve-xpu:
+    build:
+      args:
+        PACKAGE_OPTION: pip
+        XPU_SMI_VERSION: ${XPU_SMI_VERSION:-1.2.38}
+        TORCHSERVE_BASE: ipex-xpu-base
+      labels:
+        dependency.apt.numactl: true
+        dependency.apt.openjdk-17-jdk: true
+        dependency.apt.xpu-smi: ${XPU_SMI_VERSION:-1.2.38}
+        dependency.idp: false
+        dependency.python.pip: serving/torchserve-xpu-requirements.txt
+        docs: serving
+        org.opencontainers.base.name: "intel/python:3.10-core"
+        org.opencontainers.image.title: "Intel® Extension for PyTorch XPU Serving Image"
+        org.opencontainers.image.version: ${IPEX_VERSION:-2.1.40}-serving-xpu
+      target: torchserve-xpu
+    command: torchserve --version
+    entrypoint: ""
+    extends: xpu
+    image: ${REGISTRY}/${REPO}:b-${GITHUB_RUN_NUMBER:-0}-${BASE_IMAGE_NAME:-ubuntu}-${BASE_IMAGE_TAG:-22.04}-py${PYTHON_VERSION:-3.10}-torchserve-xpu
   hf-genai:
     build:
       args:
diff --git a/pytorch/serving/README.md b/pytorch/serving/README.md
@@ -12,29 +12,73 @@ The [Torchserve Model Archiver](https://github.com/pytorch/serve/blob/master/mod
 
 Follow the instructions found in the link above depending on whether you are intending to archive a model or a workflow. Use the provided container rather than installing the archiver with the example command below:
 
+#### Create a Model Archive for CPU device
+
 ```bash
 curl -O https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth
 docker run --rm -it \
+           --entrypoint='' \
+           -u root \
            -v $PWD:/home/model-server \
            intel/intel-optimized-pytorch:2.4.0-serving-cpu \
-           torch-model-archiver --model-name squeezenet \
-            --version 1.0 \
-            --model-file model-archive/model.py \
-            --serialized-file squeezenet1_1-b8a52dc0.pth \
-            --handler image_classifier \
-            --export-path /home/model-server
+           torch-model-archiver --model-name squeezenet1_1 \
+           --version 1.1 \
+           --model-file model-archive/model.py \
+           --serialized-file squeezenet1_1-b8a52dc0.pth \
+           --handler image_classifier \
+           --export-path /home/model-server
+```
+
+### Create a Model Archive for XPU device
+
+Use a squeezenet model [optimized](./model-store/ipex_squeezenet.py) for XPU using Intel® Extension for PyTorch*.
+
+```bash
+docker run --rm -it \
+           --entrypoint='' \
+           -u root \
+           -v $PWD:/home/model-server \
+           --device /dev/dri \
+           intel/intel-optimized-pytorch:2.1.40-serving-xpu \
+           sh -c 'python model-archive/ipex_squeezenet.py && \
+           torch-model-archiver --model-name squeezenet1_1 \
+           --version 1.1 \
+           --serialized-file squeezenet1_1-jit.pt \
+           --handler image_classifier \
+           --export-path /home/model-server'
 ```
 
 ### Test Model
 
 Test Torchserve with the new archived model. The example below is for the squeezenet model.
 
+#### Run Torchserve for CPU device
+
 ```bash
 # Assuming that the above pre-archived model is in the current working directory
 docker run -d --rm --name server \
           -v $PWD:/home/model-server/model-store \
+          -v $PWD/wf-store:/home/model-server/wf-store \
           --net=host \
           intel/intel-optimized-pytorch:2.4.0-serving-cpu
+```
+
+#### Run Torchserve for XPU device
+
+```bash
+# Assuming that the above pre-archived model is in the current working directory
+docker run -d --rm --name server \
+          -v $PWD:/home/model-server/model-store \
+          -v $PWD/wf-store:/home/model-server/wf-store \
+          -v $PWD/config-xpu.properties:/home/model-server/config.properties \
+          --net=host \
+          --device /dev/dri \
+          intel/intel-optimized-pytorch:2.1.40-serving-xpu
+```
+
+After lauching the container, follow the steps below:
+
+```bash
 # Verify that the container has launched successfully
 docker logs server
 # Attempt to register the model and make an inference request
diff --git a/pytorch/serving/config-xpu.properties b/pytorch/serving/config-xpu.properties
@@ -0,0 +1,15 @@
+inference_address=http://0.0.0.0:8080
+management_address=http://0.0.0.0:8081
+metrics_address=http://0.0.0.0:8082
+number_of_netty_threads=32
+install_py_dep_per_model=true
+job_queue_size=1000
+model_store=/home/model-server/model-store
+workflow_store=/home/model-server/wf-store
+allowed_urls=https://s3.amazonaws.com/.*,https://torchserve.pytorch.org/.*
+ipex_enable=true
+ipex_gpu_enable=true
+system_metrics_cmd=/home/model-server/intel_gpu_metric_collector.py --gpu 1
+disable_token_authorization=true
+enable_model_api=true
+enable_envvars_config=true
diff --git a/pytorch/serving/model-archive/ipex_squeezenet.py b/pytorch/serving/model-archive/ipex_squeezenet.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+#
+# This file was assembled from multiple pieces, whose use is documented
+# throughout. Please refer to the TensorFlow dockerfiles documentation
+# for more information.
+# based on https://github.com/pytorch/pytorch/blob/master/Dockerfile
+#
+# NOTE: To build this you will need a docker version >= 19.03 and DOCKER_BUILDKIT=1
+#
+#       If you do not use buildkit you are not going to have a good time
+#
+#       For reference:
+#           https://docs.docker.com/develop/develop-images/build_enhancements/
+
+# pylint: skip-file
+
+import intel_extension_for_pytorch as ipex
+import torch
+import torchvision.models as models
+
+# load the model
+model = models.squeezenet1_1(pretrained=True)
+model = model.eval()
+
+# define dummy input tensor to use for the model's forward call to record operations in the model for tracing
+N, C, H, W = 1, 3, 224, 224
+data = torch.randn(N, C, H, W)
+
+model.eval()
+data = torch.rand(1, 3, 224, 224)
+
+#################### code changes #################
+model = model.to("xpu")
+data = data.to("xpu")
+model = ipex.optimize(model, dtype=torch.bfloat16)
+#################### code changes #################
+
+with torch.no_grad():
+    with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16):
+        ############################# code changes #####################
+        model = torch.jit.trace(model, data)
+        model = torch.jit.freeze(model)
+        model(data)
+torch.jit.save(model, "squeezenet1_1-jit.pt")
diff --git a/pytorch/serving/model-archive/mar-test.sh b/pytorch/serving/model-archive/mar-test.sh
@@ -26,8 +26,18 @@
 #       For reference:
 #           https://docs.docker.com/develop/develop-images/build_enhancements/
 
-wget https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth
-torch-model-archiver --model-name squeezenet1_1 --version 1.1 --model-file /home/model-server/model-archive/model.py --serialized-file squeezenet1_1-b8a52dc0.pth --handler image_classifier --export-path /home/model-server/model-store
+if [[ "$1" == "cpu" ]]; then
+	wget https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth
+	torch-model-archiver --model-name squeezenet1_1 --version 1.1 --model-file /home/model-server/model-archive/model.py --serialized-file squeezenet1_1-b8a52dc0.pth --handler image_classifier --export-path /home/model-server/model-store
+	rm -rf squeezenet1_1-b8a52dc0.pth
+elif [[ "$1" == "xpu" ]]; then
+	python /home/model-server/model-archive/ipex_squeezenet.py
+	torch-model-archiver --model-name squeezenet1_1 --version 1.1 --serialized-file squeezenet1_1-jit.pt --handler image_classifier --export-path /home/model-server/model-store
+	rm -rf squeezenet1_1-jit.pt
+else
+	echo "Only cpu and xpu devices supported"
+	exit 1
+fi
+
 [ -f "/home/model-server/model-store/squeezenet1_1.mar" ] && echo "squeezenet1_1.pth Archived Succesfully at /home/model-server/model-store/squeezenet1_1.mar"
-rm -rf squeezenet1_1-b8a52dc0.pth
 find . | grep -E "(/__pycache__$|\.pyc$|\.pyo$)" | xargs rm -rf
diff --git a/pytorch/serving/tests.yaml b/pytorch/serving/tests.yaml
diff --git a/pytorch/serving/torchserve-requirements.txt b/pytorch/serving/torchserve-requirements.txt
diff --git a/pytorch/serving/torchserve-xpu-requirements.txt b/pytorch/serving/torchserve-xpu-requirements.txt