Add KServe Documentation for Frameworks (#196)

* add validated torchserve kserve support * add tf * add link to patch * remove annotations * Update patch.yaml * fix tips
intel · tylertitsworth · Apr 10, 2024 · Feb 26, 2024 · Feb 28, 2024 · Feb 29, 2024
commit 1b7bd98fc42eaafabac4c8b207e36c8defff7fca
@@ -162,3 +162,60 @@ There are some additional steps that can be taken to prepare your service for yo
 - Integrate an [Ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) to your service to serve to a hostname rather than an ip address.
 - Integrate [MLFlow](https://github.com/mlflow/mlflow-torchserve).
 - Integrate an [SSL Certificate](https://pytorch.org/serve/configuration.html#enable-ssl) in your model config file to serve models securely.
+
+### KServe
+
+Apply Intel Optimizations to KServe by patching the serving runtimes to use Intel Optimized Serving Containers with `kubectl apply -f patch.yaml`
+
+> [!NOTE]
+> You can modify this `patch.yaml` file to change the serving runtime pod configuration.
+
+#### Create an Endpoint
+
+1. Create a volume with the follow file configuration:
+
+    ```text
+    my-volume
+    ├── config
+    │   └── config.properties
+    └── model-store
+        └── my-model.mar
+    ```
+
+2. Modify your TorchServe Server Configuration with the following:
+
+    ```text
+    ...
+    enable_metrics_api=true
+    metrics_mode=prometheus
+    model_store=/mnt/models/model-store
+    model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"my-model":{"1.0":{"defaultVersion":true,"marName":"my-model.mar"}}}}
+    ```
+
+3. Create a new endpoint
+
+    ```yaml
+    apiVersion: "serving.kserve.io/v1beta1"
+    kind: "InferenceService"
+    metadata:
+      name: "ipex-torchserve-sample"
+    spec:
+      predictor:
+        model:
+          modelFormat:
+            name: pytorch
+          protocolVersion: v2
+          storageUri: pvc://my-volume
+    ```
+
+4. Test the endpoint
+
+    ```bash
+    curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models
+    ```
+
+> [!TIP]
+> You can find your `SERVICE_HOSTNAME` in the KubeFlow UI with the copy button and removing the `http://` from the url.
+
+> [!TIP]
+> You can find your ingress information with `kubectl get svc -n istio-system | grep istio-ingressgateway` and using the external IP and port mapped to `80`.
@@ -2,9 +2,10 @@
 
 cd .. || exit
 docker compose pull torchserve
-docker tag "$(docker images -q | head -n1)" intel/torchserve:latest-kfs
+docker tag "$(docker images -q | head -n1)" intel/torchserve:latest
 git clone https://github.com/pytorch/serve
 cd serve/kubernetes/kserve || exit
 git apply ../../../serving/kfs.patch
 git submodule update --init --recursive
 ./build_image.sh
+rm -rf serve/
@@ -1,5 +1,5 @@
 diff --git a/kubernetes/kserve/build_image.sh b/kubernetes/kserve/build_image.sh
-index b644dc37..88512841 100755
+index b644dc37..200a431d 100755
 --- a/kubernetes/kserve/build_image.sh
 +++ b/kubernetes/kserve/build_image.sh
 @@ -1,8 +1,8 @@
@@ -20,12 +20,15 @@ index b644dc37..88512841 100755
 -DOCKER_BUILDKIT=1 docker build --file "$DOCKER_FILE" --build-arg BASE_IMAGE=$BASE_IMAGE -t "$DOCKER_TAG" .
 +DOCKER_BUILDKIT=1 docker build --file "$DOCKER_FILE" --build-arg http_proxy=${http_proxy} --build-arg https_proxy=${https_proxy} --build-arg BASE_IMAGE=$BASE_IMAGE -t "$DOCKER_TAG" .
 diff --git a/kubernetes/kserve/config.properties b/kubernetes/kserve/config.properties
-index 422e53d1..93514c21 100644
+index 422e53d1..23b35123 100644
 --- a/kubernetes/kserve/config.properties
 +++ b/kubernetes/kserve/config.properties
-@@ -14,3 +14,7 @@ job_queue_size=10
+@@ -12,5 +12,9 @@ NUM_WORKERS=1
+ number_of_netty_threads=4
+ job_queue_size=10
  load_models=all
- model_store=/home/model-server/shared/model-store
+-model_store=/home/model-server/shared/model-store
++model_store=/mnt/models/model-store
  model_snapshot={"name":"startup.cfg","modelCount":1,"models":{"mnist":{"1.0":{"defaultVersion":true,"marName":"mnist.mar","minWorkers":1,"maxWorkers":5,"batchSize":5,"maxBatchDelay":200,"responseTimeout":60}}}}
 +allowed_urls=https://s3.amazonaws.com/.*,https://torchserve.pytorch.org/.*
 +ipex_enable=true